@novastera-oss/llamarn 0.2.6 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/cpp/include/llama.h +141 -38
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/LlamaCppModel.cpp +58 -24
- package/cpp/LlamaCppModel.h +3 -3
- package/cpp/PureCppImpl.cpp +1 -1
- package/cpp/PureCppImpl.h +2 -2
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +15 -4
- package/cpp/llama.cpp/Makefile +2 -2
- package/cpp/llama.cpp/README.md +32 -13
- package/cpp/llama.cpp/common/CMakeLists.txt +10 -20
- package/cpp/llama.cpp/common/arg.cpp +37 -6
- package/cpp/llama.cpp/common/build-info.cpp.in +2 -2
- package/cpp/llama.cpp/common/chat-parser.cpp +5 -0
- package/cpp/llama.cpp/common/chat-parser.h +2 -0
- package/cpp/llama.cpp/common/chat.cpp +12 -9
- package/cpp/llama.cpp/common/chat.h +1 -1
- package/cpp/llama.cpp/common/common.cpp +53 -40
- package/cpp/llama.cpp/common/common.h +6 -2
- package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
- package/cpp/llama.cpp/common/speculative.cpp +6 -4
- package/cpp/llama.cpp/convert_hf_to_gguf.py +215 -76
- package/cpp/llama.cpp/ggml/CMakeLists.txt +48 -2
- package/cpp/llama.cpp/ggml/cmake/common.cmake +1 -2
- package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
- package/cpp/llama.cpp/ggml/include/ggml.h +33 -0
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +64 -13
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +6 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +33 -9
- package/cpp/llama.cpp/ggml/src/ggml-common.h +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +124 -26
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +11 -10
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4114 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2163 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2639 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2732 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2069 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +397 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1300 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1481 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4311 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +4 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +16 -7
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +93 -104
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +12 -8
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +58 -8
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +194 -69
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +1158 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1571 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +213 -37
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +45 -45
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +59 -37
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +90 -39
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +61 -183
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +11 -10
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +16 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +260 -49
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +497 -282
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1078 -468
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +0 -2
- package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +18 -15
- package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +20 -48
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +117 -165
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +192 -53
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +32 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +99 -159
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +8 -105
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +209 -92
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +158 -203
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +38 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +24 -20
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +108 -16
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +36 -28
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +487 -247
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml.c +69 -19
- package/cpp/llama.cpp/ggml/src/gguf.cpp +5 -1
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +133 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +25 -1
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +78 -3
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +97 -4
- package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
- package/cpp/llama.cpp/include/llama.h +141 -38
- package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
- package/cpp/llama.cpp/requirements/requirements-compare-llama-bench.txt +1 -0
- package/cpp/llama.cpp/src/CMakeLists.txt +2 -2
- package/cpp/llama.cpp/src/llama-arch.cpp +150 -3
- package/cpp/llama.cpp/src/llama-arch.h +25 -1
- package/cpp/llama.cpp/src/llama-batch.cpp +736 -274
- package/cpp/llama.cpp/src/llama-batch.h +110 -57
- package/cpp/llama.cpp/src/llama-chat.cpp +30 -8
- package/cpp/llama.cpp/src/llama-chat.h +1 -0
- package/cpp/llama.cpp/src/llama-context.cpp +360 -266
- package/cpp/llama.cpp/src/llama-context.h +27 -23
- package/cpp/llama.cpp/src/llama-cparams.cpp +1 -1
- package/cpp/llama.cpp/src/llama-cparams.h +1 -1
- package/cpp/llama.cpp/src/llama-graph.cpp +411 -344
- package/cpp/llama.cpp/src/llama-graph.h +126 -58
- package/cpp/llama.cpp/src/llama-hparams.cpp +10 -2
- package/cpp/llama.cpp/src/llama-hparams.h +16 -2
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +103 -73
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +34 -42
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +345 -221
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +75 -50
- package/cpp/llama.cpp/src/llama-kv-cells.h +51 -22
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +246 -0
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +138 -0
- package/cpp/llama.cpp/src/{llama-kv-cache-recurrent.cpp → llama-memory-recurrent.cpp} +302 -317
- package/cpp/llama.cpp/src/{llama-kv-cache-recurrent.h → llama-memory-recurrent.h} +60 -68
- package/cpp/llama.cpp/src/llama-memory.cpp +41 -0
- package/cpp/llama.cpp/src/llama-memory.h +73 -36
- package/cpp/llama.cpp/src/llama-mmap.cpp +1 -1
- package/cpp/llama.cpp/src/llama-model-loader.cpp +42 -17
- package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
- package/cpp/llama.cpp/src/llama-model.cpp +1630 -511
- package/cpp/llama.cpp/src/llama-model.h +26 -0
- package/cpp/llama.cpp/src/llama-quant.cpp +89 -6
- package/cpp/llama.cpp/src/llama-vocab.cpp +58 -26
- package/cpp/llama.cpp/src/llama-vocab.h +1 -0
- package/cpp/llama.cpp/src/llama.cpp +11 -7
- package/cpp/llama.cpp/src/unicode.cpp +5 -0
- package/cpp/rn-completion.cpp +2 -2
- package/cpp/{rn-llama.hpp → rn-llama.h} +1 -1
- package/cpp/{rn-utils.hpp → rn-utils.h} +3 -0
- package/ios/include/chat.h +1 -1
- package/ios/include/common.h +6 -2
- package/ios/include/llama.h +141 -38
- package/ios/libs/llama.xcframework/Info.plist +15 -15
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4689
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3622
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3766 -3624
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4689
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3622
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4926 -4725
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4897 -4746
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3794 -3652
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -2
- package/cpp/llama.cpp/common/cmake/build-info-gen-cpp.cmake +0 -24
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13891
- package/cpp/llama.cpp/src/llama-kv-cache.cpp +0 -1
- package/cpp/llama.cpp/src/llama-kv-cache.h +0 -44
- /package/cpp/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
|
@@ -329,60 +329,51 @@ static void acc_f32_sycl(const float *x, const float *y, float *dst,
|
|
|
329
329
|
const int ne12, const int nb1, const int nb2,
|
|
330
330
|
const int offset, queue_ptr stream) {
|
|
331
331
|
int num_blocks = (n_elements + SYCL_ACC_BLOCK_SIZE - 1) / SYCL_ACC_BLOCK_SIZE;
|
|
332
|
-
stream
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
item_ct1);
|
|
339
|
-
});
|
|
332
|
+
sycl_parallel_for(stream,
|
|
333
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_ACC_BLOCK_SIZE),
|
|
334
|
+
sycl::range<3>(1, 1, SYCL_ACC_BLOCK_SIZE)),
|
|
335
|
+
[=](sycl::nd_item<3> item_ct1) {
|
|
336
|
+
acc_f32(x, y, dst, n_elements, ne10, ne11, ne12, nb1, nb2, offset, item_ct1);
|
|
337
|
+
});
|
|
340
338
|
}
|
|
341
339
|
|
|
342
340
|
template<typename T>
|
|
343
341
|
static void gelu_sycl(const T *x, T *dst, const int k,
|
|
344
342
|
queue_ptr stream) {
|
|
345
343
|
const int num_blocks = (k + SYCL_GELU_BLOCK_SIZE - 1) / SYCL_GELU_BLOCK_SIZE;
|
|
346
|
-
stream
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
351
|
-
gelu(x, dst, k, item_ct1);
|
|
352
|
-
});
|
|
344
|
+
sycl_parallel_for(stream,
|
|
345
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE),
|
|
346
|
+
sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE)),
|
|
347
|
+
[=](sycl::nd_item<3> item_ct1) { gelu(x, dst, k, item_ct1); });
|
|
353
348
|
}
|
|
354
349
|
|
|
355
350
|
template<typename T>
|
|
356
351
|
static void silu_sycl(const T *x, T *dst, const int k,
|
|
357
352
|
queue_ptr stream) {
|
|
358
353
|
const int num_blocks = (k + SYCL_SILU_BLOCK_SIZE - 1) / SYCL_SILU_BLOCK_SIZE;
|
|
359
|
-
stream
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
364
|
-
silu(x, dst, k, item_ct1);
|
|
365
|
-
});
|
|
354
|
+
sycl_parallel_for(stream,
|
|
355
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_SILU_BLOCK_SIZE),
|
|
356
|
+
sycl::range<3>(1, 1, SYCL_SILU_BLOCK_SIZE)),
|
|
357
|
+
[=](sycl::nd_item<3> item_ct1) { silu(x, dst, k, item_ct1); });
|
|
366
358
|
}
|
|
367
359
|
|
|
368
360
|
template<typename T>
|
|
369
361
|
static void sgn_sycl(const T * x, T * dst, const int k, queue_ptr stream) {
|
|
370
362
|
// hard code for now
|
|
371
363
|
const int num_blocks = ceil_div(k, 256);
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
});
|
|
364
|
+
sycl_parallel_for(
|
|
365
|
+
stream, sycl::nd_range<3>((sycl::range<3>(1, 1, num_blocks) * sycl::range(1, 1, 256)), sycl::range(1, 1, 256)),
|
|
366
|
+
[=](sycl::nd_item<3> item_ct1) { sgn(x, dst, k, item_ct1); });
|
|
376
367
|
}
|
|
377
368
|
|
|
378
369
|
template<typename T>
|
|
379
370
|
static void abs_sycl(const T * x, T * dst, const int k, queue_ptr stream) {
|
|
380
371
|
// hard code for now
|
|
381
372
|
const int num_blocks = ceil_div(k, 256);
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
373
|
+
sycl_parallel_for(
|
|
374
|
+
stream,
|
|
375
|
+
sycl::nd_range<3>((sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, 256)), sycl::range<3>(1, 1, 256)),
|
|
376
|
+
[=](sycl::nd_item<3> item_ct1) { abs_op(x, dst, k, item_ct1); });
|
|
386
377
|
}
|
|
387
378
|
|
|
388
379
|
|
|
@@ -390,23 +381,20 @@ template<typename T>
|
|
|
390
381
|
static void elu_sycl(const T * x, T * dst, const int k, queue_ptr stream) {
|
|
391
382
|
// hard code for now
|
|
392
383
|
const int num_blocks = ceil_div(k, 256);
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
384
|
+
sycl_parallel_for(
|
|
385
|
+
stream,
|
|
386
|
+
sycl::nd_range<3>((sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, 256)), sycl::range<3>(1, 1, 256)),
|
|
387
|
+
[=](sycl::nd_item<3> item_ct1) { elu_op(x, dst, k, item_ct1); });
|
|
397
388
|
}
|
|
398
389
|
|
|
399
390
|
template<typename T>
|
|
400
391
|
static void gelu_quick_sycl(const T *x, T *dst, const int k,
|
|
401
392
|
queue_ptr stream) {
|
|
402
393
|
const int num_blocks = (k + SYCL_GELU_BLOCK_SIZE - 1) / SYCL_GELU_BLOCK_SIZE;
|
|
403
|
-
stream
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
408
|
-
gelu_quick(x, dst, k, item_ct1);
|
|
409
|
-
});
|
|
394
|
+
sycl_parallel_for(stream,
|
|
395
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE),
|
|
396
|
+
sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE)),
|
|
397
|
+
[=](sycl::nd_item<3> item_ct1) { gelu_quick(x, dst, k, item_ct1); });
|
|
410
398
|
}
|
|
411
399
|
|
|
412
400
|
|
|
@@ -414,169 +402,133 @@ template<typename T>
|
|
|
414
402
|
static void gelu_erf_sycl(const T *x, T *dst, const int k,
|
|
415
403
|
queue_ptr stream) {
|
|
416
404
|
const int num_blocks = ceil_div(k, SYCL_GELU_BLOCK_SIZE);
|
|
417
|
-
stream
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
422
|
-
gelu_erf(x, dst, k, item_ct1);
|
|
423
|
-
});
|
|
405
|
+
sycl_parallel_for(stream,
|
|
406
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE),
|
|
407
|
+
sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE)),
|
|
408
|
+
[=](sycl::nd_item<3> item_ct1) { gelu_erf(x, dst, k, item_ct1); });
|
|
424
409
|
}
|
|
425
410
|
|
|
426
411
|
template<typename T>
|
|
427
412
|
static void tanh_sycl(const T *x, T *dst, const int k,
|
|
428
413
|
queue_ptr stream) {
|
|
429
414
|
const int num_blocks = (k + SYCL_TANH_BLOCK_SIZE - 1) / SYCL_TANH_BLOCK_SIZE;
|
|
430
|
-
stream
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
435
|
-
tanh(x, dst, k, item_ct1);
|
|
436
|
-
});
|
|
415
|
+
sycl_parallel_for(stream,
|
|
416
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_TANH_BLOCK_SIZE),
|
|
417
|
+
sycl::range<3>(1, 1, SYCL_TANH_BLOCK_SIZE)),
|
|
418
|
+
[=](sycl::nd_item<3> item_ct1) { tanh(x, dst, k, item_ct1); });
|
|
437
419
|
}
|
|
438
420
|
|
|
439
421
|
template<typename T>
|
|
440
422
|
static void relu_sycl(const T *x, T *dst, const int k,
|
|
441
423
|
queue_ptr stream) {
|
|
442
424
|
const int num_blocks = (k + SYCL_RELU_BLOCK_SIZE - 1) / SYCL_RELU_BLOCK_SIZE;
|
|
443
|
-
stream
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
448
|
-
relu(x, dst, k, item_ct1);
|
|
449
|
-
});
|
|
425
|
+
sycl_parallel_for(stream,
|
|
426
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_RELU_BLOCK_SIZE),
|
|
427
|
+
sycl::range<3>(1, 1, SYCL_RELU_BLOCK_SIZE)),
|
|
428
|
+
[=](sycl::nd_item<3> item_ct1) { relu(x, dst, k, item_ct1); });
|
|
450
429
|
}
|
|
451
430
|
|
|
452
431
|
template<typename T>
|
|
453
432
|
static void hardsigmoid_sycl(const T *x, T *dst, const int k,
|
|
454
433
|
queue_ptr stream) {
|
|
455
434
|
const int num_blocks = (k + SYCL_HARDSIGMOID_BLOCK_SIZE - 1) / SYCL_HARDSIGMOID_BLOCK_SIZE;
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
435
|
+
sycl_parallel_for(
|
|
436
|
+
stream,
|
|
437
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_HARDSIGMOID_BLOCK_SIZE),
|
|
459
438
|
sycl::range<3>(1, 1, SYCL_HARDSIGMOID_BLOCK_SIZE)),
|
|
460
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
461
|
-
hardsigmoid(x, dst, k, item_ct1);
|
|
462
|
-
});
|
|
439
|
+
[=](sycl::nd_item<3> item_ct1) { hardsigmoid(x, dst, k, item_ct1); });
|
|
463
440
|
}
|
|
464
441
|
|
|
465
442
|
template<typename T>
|
|
466
443
|
static void hardswish_sycl(const T *x, T *dst, const int k,
|
|
467
444
|
queue_ptr stream) {
|
|
468
445
|
const int num_blocks = (k + SYCL_HARDSWISH_BLOCK_SIZE - 1) / SYCL_HARDSWISH_BLOCK_SIZE;
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
446
|
+
sycl_parallel_for(
|
|
447
|
+
stream,
|
|
448
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_HARDSWISH_BLOCK_SIZE),
|
|
472
449
|
sycl::range<3>(1, 1, SYCL_HARDSWISH_BLOCK_SIZE)),
|
|
473
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
474
|
-
hardswish(x, dst, k, item_ct1);
|
|
475
|
-
});
|
|
450
|
+
[=](sycl::nd_item<3> item_ct1) { hardswish(x, dst, k, item_ct1); });
|
|
476
451
|
}
|
|
477
452
|
|
|
478
453
|
template<typename T>
|
|
479
454
|
static void exp_sycl(const T *x, T *dst, const int k,
|
|
480
455
|
queue_ptr stream) {
|
|
481
456
|
const int num_blocks = (k + SYCL_EXP_BLOCK_SIZE - 1) / SYCL_EXP_BLOCK_SIZE;
|
|
482
|
-
stream
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
487
|
-
exp(x, dst, k, item_ct1);
|
|
488
|
-
});
|
|
457
|
+
sycl_parallel_for(stream,
|
|
458
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_EXP_BLOCK_SIZE),
|
|
459
|
+
sycl::range<3>(1, 1, SYCL_EXP_BLOCK_SIZE)),
|
|
460
|
+
[=](sycl::nd_item<3> item_ct1) { exp(x, dst, k, item_ct1); });
|
|
489
461
|
}
|
|
490
462
|
|
|
491
463
|
template<typename T>
|
|
492
464
|
static void log_sycl(const T *x, T *dst, const int k,
|
|
493
465
|
queue_ptr stream) {
|
|
494
466
|
const int num_blocks = (k + SYCL_EXP_BLOCK_SIZE - 1) / SYCL_EXP_BLOCK_SIZE;
|
|
495
|
-
stream
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
500
|
-
log(x, dst, k, item_ct1);
|
|
501
|
-
});
|
|
467
|
+
sycl_parallel_for(stream,
|
|
468
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_EXP_BLOCK_SIZE),
|
|
469
|
+
sycl::range<3>(1, 1, SYCL_EXP_BLOCK_SIZE)),
|
|
470
|
+
[=](sycl::nd_item<3> item_ct1) { log(x, dst, k, item_ct1); });
|
|
502
471
|
}
|
|
503
472
|
|
|
504
473
|
template<typename T>
|
|
505
474
|
static void neg_sycl(const T *x, T *dst, const int k,
|
|
506
475
|
queue_ptr stream) {
|
|
507
476
|
const int num_blocks = (k + SYCL_NEG_BLOCK_SIZE - 1) / SYCL_NEG_BLOCK_SIZE;
|
|
508
|
-
stream
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
513
|
-
neg(x, dst, k, item_ct1);
|
|
514
|
-
});
|
|
477
|
+
sycl_parallel_for(stream,
|
|
478
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_NEG_BLOCK_SIZE),
|
|
479
|
+
sycl::range<3>(1, 1, SYCL_NEG_BLOCK_SIZE)),
|
|
480
|
+
[=](sycl::nd_item<3> item_ct1) { neg(x, dst, k, item_ct1); });
|
|
515
481
|
}
|
|
516
482
|
|
|
517
483
|
template<typename T>
|
|
518
484
|
static void step_sycl(const T *x, T *dst, const int k,
|
|
519
485
|
queue_ptr stream) {
|
|
520
486
|
const int num_blocks = (k + SYCL_NEG_BLOCK_SIZE - 1) / SYCL_NEG_BLOCK_SIZE;
|
|
521
|
-
stream
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
526
|
-
step(x, dst, k, item_ct1);
|
|
527
|
-
});
|
|
487
|
+
sycl_parallel_for(stream,
|
|
488
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_NEG_BLOCK_SIZE),
|
|
489
|
+
sycl::range<3>(1, 1, SYCL_NEG_BLOCK_SIZE)),
|
|
490
|
+
[=](sycl::nd_item<3> item_ct1) { step(x, dst, k, item_ct1); });
|
|
528
491
|
}
|
|
529
492
|
|
|
530
493
|
template<typename T>
|
|
531
494
|
static void sigmoid_sycl(const T *x, T *dst, const int k,
|
|
532
495
|
queue_ptr stream) {
|
|
533
496
|
const int num_blocks = (k + SYCL_SIGMOID_BLOCK_SIZE - 1) / SYCL_SIGMOID_BLOCK_SIZE;
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
497
|
+
sycl_parallel_for(
|
|
498
|
+
stream,
|
|
499
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_SIGMOID_BLOCK_SIZE),
|
|
537
500
|
sycl::range<3>(1, 1, SYCL_SIGMOID_BLOCK_SIZE)),
|
|
538
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
539
|
-
sigmoid(x, dst, k, item_ct1);
|
|
540
|
-
});
|
|
501
|
+
[=](sycl::nd_item<3> item_ct1) { sigmoid(x, dst, k, item_ct1); });
|
|
541
502
|
}
|
|
542
503
|
|
|
543
504
|
template<typename T>
|
|
544
505
|
static void sqrt_sycl(const T *x, T *dst, const int k,
|
|
545
506
|
queue_ptr stream) {
|
|
546
507
|
const int num_blocks = (k + SYCL_SQRT_BLOCK_SIZE - 1) / SYCL_SQRT_BLOCK_SIZE;
|
|
547
|
-
stream
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
552
|
-
sqrt(x, dst, k, item_ct1);
|
|
553
|
-
});
|
|
508
|
+
sycl_parallel_for(stream,
|
|
509
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_SQRT_BLOCK_SIZE),
|
|
510
|
+
sycl::range<3>(1, 1, SYCL_SQRT_BLOCK_SIZE)),
|
|
511
|
+
[=](sycl::nd_item<3> item_ct1) { sqrt(x, dst, k, item_ct1); });
|
|
554
512
|
}
|
|
555
513
|
|
|
556
514
|
template<typename T>
|
|
557
515
|
static void sin_sycl(const T *x, T *dst, const int k,
|
|
558
516
|
queue_ptr stream) {
|
|
559
517
|
const int num_blocks = (k + SYCL_SIN_BLOCK_SIZE - 1) / SYCL_SIN_BLOCK_SIZE;
|
|
560
|
-
stream
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
565
|
-
sin(x, dst, k, item_ct1);
|
|
566
|
-
});
|
|
518
|
+
sycl_parallel_for(stream,
|
|
519
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_SIN_BLOCK_SIZE),
|
|
520
|
+
sycl::range<3>(1, 1, SYCL_SIN_BLOCK_SIZE)),
|
|
521
|
+
[=](sycl::nd_item<3> item_ct1) { sin(x, dst, k, item_ct1); });
|
|
567
522
|
}
|
|
568
523
|
|
|
569
524
|
template<typename T>
|
|
570
525
|
static void cos_sycl(const T *x, T *dst, const int k,
|
|
571
526
|
queue_ptr stream) {
|
|
572
527
|
const int num_blocks = (k + SYCL_SIN_BLOCK_SIZE - 1) / SYCL_SIN_BLOCK_SIZE;
|
|
573
|
-
stream
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
578
|
-
cos(x, dst, k, item_ct1);
|
|
579
|
-
});
|
|
528
|
+
sycl_parallel_for(stream,
|
|
529
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_SIN_BLOCK_SIZE),
|
|
530
|
+
sycl::range<3>(1, 1, SYCL_SIN_BLOCK_SIZE)),
|
|
531
|
+
[=](sycl::nd_item<3> item_ct1) { cos(x, dst, k, item_ct1); });
|
|
580
532
|
}
|
|
581
533
|
|
|
582
534
|
template<typename T>
|
|
@@ -584,26 +536,20 @@ static void leaky_relu_sycl(const T *x, T *dst, const int k,
|
|
|
584
536
|
const float negative_slope,
|
|
585
537
|
queue_ptr stream) {
|
|
586
538
|
const int num_blocks = (k + SYCL_RELU_BLOCK_SIZE - 1) / SYCL_RELU_BLOCK_SIZE;
|
|
587
|
-
stream
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
592
|
-
leaky_relu(x, dst, k, negative_slope, item_ct1);
|
|
593
|
-
});
|
|
539
|
+
sycl_parallel_for(stream,
|
|
540
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_RELU_BLOCK_SIZE),
|
|
541
|
+
sycl::range<3>(1, 1, SYCL_RELU_BLOCK_SIZE)),
|
|
542
|
+
[=](sycl::nd_item<3> item_ct1) { leaky_relu(x, dst, k, negative_slope, item_ct1); });
|
|
594
543
|
}
|
|
595
544
|
|
|
596
545
|
template<typename T>
|
|
597
546
|
static void sqr_sycl(const T *x, T *dst, const int k,
|
|
598
547
|
queue_ptr stream) {
|
|
599
548
|
const int num_blocks = (k + SYCL_SQR_BLOCK_SIZE - 1) / SYCL_SQR_BLOCK_SIZE;
|
|
600
|
-
stream
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
605
|
-
sqr(x, dst, k, item_ct1);
|
|
606
|
-
});
|
|
549
|
+
sycl_parallel_for(stream,
|
|
550
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_SQR_BLOCK_SIZE),
|
|
551
|
+
sycl::range<3>(1, 1, SYCL_SQR_BLOCK_SIZE)),
|
|
552
|
+
[=](sycl::nd_item<3> item_ct1) { sqr(x, dst, k, item_ct1); });
|
|
607
553
|
}
|
|
608
554
|
|
|
609
555
|
template<typename T>
|
|
@@ -614,9 +560,8 @@ static void upscale_sycl(const T *x, T *dst, const int nb00, const int nb01,
|
|
|
614
560
|
int dst_size = ne10 * ne11 * ne12 * ne13;
|
|
615
561
|
int num_blocks = (dst_size + SYCL_UPSCALE_BLOCK_SIZE - 1) / SYCL_UPSCALE_BLOCK_SIZE;
|
|
616
562
|
sycl::range<1> gridDim(num_blocks * SYCL_UPSCALE_BLOCK_SIZE);
|
|
617
|
-
|
|
618
|
-
sycl::nd_range<1>(gridDim, sycl::range<1>(SYCL_UPSCALE_BLOCK_SIZE)),
|
|
619
|
-
[=](sycl::nd_item<1> item_ct1) {
|
|
563
|
+
sycl_parallel_for<1>(
|
|
564
|
+
stream, sycl::nd_range<1>(gridDim, sycl::range<1>(SYCL_UPSCALE_BLOCK_SIZE)), [=](sycl::nd_item<1> item_ct1) {
|
|
620
565
|
upscale(x, dst, nb00, nb01, nb02, nb03, ne10, ne11, ne12, ne13, sf0, sf1, sf2, sf3, item_ct1);
|
|
621
566
|
});
|
|
622
567
|
}
|
|
@@ -627,12 +572,10 @@ static void pad_sycl(const T *x, T *dst, const int ne00,
|
|
|
627
572
|
const int ne1, const int ne2, queue_ptr stream) {
|
|
628
573
|
int num_blocks = (ne0 + SYCL_PAD_BLOCK_SIZE - 1) / SYCL_PAD_BLOCK_SIZE;
|
|
629
574
|
sycl::range<3> gridDim(ne2, ne1, num_blocks);
|
|
630
|
-
stream
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
pad(x, dst, ne0, ne00, ne01, ne02, item_ct1);
|
|
635
|
-
});
|
|
575
|
+
sycl_parallel_for(stream,
|
|
576
|
+
sycl::nd_range<3>(gridDim * sycl::range<3>(1, 1, SYCL_PAD_BLOCK_SIZE),
|
|
577
|
+
sycl::range<3>(1, 1, SYCL_PAD_BLOCK_SIZE)),
|
|
578
|
+
[=](sycl::nd_item<3> item_ct1) { pad(x, dst, ne0, ne00, ne01, ne02, item_ct1); });
|
|
636
579
|
}
|
|
637
580
|
|
|
638
581
|
template<typename T>
|
|
@@ -640,13 +583,10 @@ static void clamp_sycl(const T *x, T *dst, const float min,
|
|
|
640
583
|
const float max, const int k,
|
|
641
584
|
queue_ptr stream) {
|
|
642
585
|
const int num_blocks = (k + SYCL_CLAMP_BLOCK_SIZE - 1) / SYCL_CLAMP_BLOCK_SIZE;
|
|
643
|
-
stream
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
648
|
-
clamp(x, dst, min, max, k, item_ct1);
|
|
649
|
-
});
|
|
586
|
+
sycl_parallel_for(stream,
|
|
587
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_CLAMP_BLOCK_SIZE),
|
|
588
|
+
sycl::range<3>(1, 1, SYCL_CLAMP_BLOCK_SIZE)),
|
|
589
|
+
[=](sycl::nd_item<3> item_ct1) { clamp(x, dst, min, max, k, item_ct1); });
|
|
650
590
|
}
|
|
651
591
|
|
|
652
592
|
inline void ggml_sycl_op_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
@@ -65,6 +65,9 @@ public:
|
|
|
65
65
|
|
|
66
66
|
dnnl::primitive_attr primitive_attr;
|
|
67
67
|
primitive_attr.set_scratchpad_mode(dnnl::scratchpad_mode::user);
|
|
68
|
+
#ifdef GGML_SYCL_F16
|
|
69
|
+
primitive_attr.set_fpmath_mode(dnnl::fpmath_mode::f16);
|
|
70
|
+
#endif
|
|
68
71
|
|
|
69
72
|
auto a_mem = dnnl::memory(a_in_md, eng, const_cast<void*>(a));
|
|
70
73
|
auto b_mem = dnnl::memory(b_in_md, eng, const_cast<void*>(b));
|
|
@@ -60,54 +60,6 @@ static void k_get_rows(
|
|
|
60
60
|
dst_row[iybs + iqs + y_offset] = v.y();
|
|
61
61
|
}
|
|
62
62
|
|
|
63
|
-
template<int qk, int qr, dequantize_kernel_t_reorder dequantize_kernel_recorder, typename dst_t>
|
|
64
|
-
static void k_get_rows_reorder(
|
|
65
|
-
const void * src0, const void *src0_dq, const int32_t * src1, dst_t * dst,
|
|
66
|
-
int64_t ne00, /*int64_t ne01, int64_t ne02, int64_t ne03,*/
|
|
67
|
-
/*int64_t ne10, int64_t ne11,*/ int64_t ne12, /*int64_t ne13,*/
|
|
68
|
-
/*size_t s0,*/ size_t s1, size_t s2, size_t s3,
|
|
69
|
-
/*size_t nb00,*/ size_t nb01, size_t nb02, size_t nb03,
|
|
70
|
-
size_t s10, size_t s11, size_t s12,
|
|
71
|
-
const sycl::nd_item<3> &item_ct1/*, size_t s13*/) {
|
|
72
|
-
|
|
73
|
-
const int i00 = (item_ct1.get_group(2) * item_ct1.get_local_range(2) +
|
|
74
|
-
item_ct1.get_local_id(2)) *
|
|
75
|
-
2;
|
|
76
|
-
const int i10 = item_ct1.get_local_range(1) * item_ct1.get_group(1) +
|
|
77
|
-
item_ct1.get_local_id(1);
|
|
78
|
-
const int i11 = (item_ct1.get_group(0) * item_ct1.get_local_range(0) +
|
|
79
|
-
item_ct1.get_local_id(0)) /
|
|
80
|
-
ne12;
|
|
81
|
-
const int i12 = (item_ct1.get_group(0) * item_ct1.get_local_range(0) +
|
|
82
|
-
item_ct1.get_local_id(0)) %
|
|
83
|
-
ne12;
|
|
84
|
-
|
|
85
|
-
if (i00 >= ne00) {
|
|
86
|
-
return;
|
|
87
|
-
}
|
|
88
|
-
auto ncols = ne00;
|
|
89
|
-
const int i01 = src1[i10*s10 + i11*s11 + i12*s12];
|
|
90
|
-
|
|
91
|
-
dst_t * dst_row = dst + i10*s1 + i11*s2 + i12*s3;
|
|
92
|
-
|
|
93
|
-
const int src0_off = i01 * ncols + i00;
|
|
94
|
-
const int ib = src0_off / QK4_0; // block index
|
|
95
|
-
const int iqs = (i00%qk)/qr; // x quant index
|
|
96
|
-
const int iybs = i00 - i00%qk; // dst block start index
|
|
97
|
-
const int y_offset = qr == 1 ? 1 : qk/2;
|
|
98
|
-
|
|
99
|
-
// dequantize
|
|
100
|
-
dfloat2 v;
|
|
101
|
-
dequantize_kernel_recorder((const void *)src0_dq, ib, (const void *)src0, src0_off/2, v);
|
|
102
|
-
|
|
103
|
-
dst_row[iybs + iqs + 0] = v.x();
|
|
104
|
-
dst_row[iybs + iqs + y_offset] = v.y();
|
|
105
|
-
|
|
106
|
-
GGML_UNUSED(nb01);
|
|
107
|
-
GGML_UNUSED(nb02);
|
|
108
|
-
GGML_UNUSED(nb03);
|
|
109
|
-
}
|
|
110
|
-
|
|
111
63
|
template<typename src0_t, typename dst_t>
|
|
112
64
|
static void k_get_rows_float(
|
|
113
65
|
const src0_t * src0, const int32_t * src1, dst_t * dst,
|
|
@@ -166,58 +118,15 @@ static void get_rows_sycl(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
|
|
|
166
118
|
|
|
167
119
|
GGML_ASSERT(ne00 % 2 == 0);
|
|
168
120
|
|
|
169
|
-
stream
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
s3, nb01, nb02, nb03, s10, s11, s12, item_ct1);
|
|
174
|
-
});
|
|
175
|
-
|
|
176
|
-
GGML_UNUSED(dst);
|
|
177
|
-
GGML_UNUSED(ctx);
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
template <int qk, int qr, dequantize_kernel_t_reorder dq_reorder>
|
|
181
|
-
static void get_rows_sycl_reorder(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
|
182
|
-
ggml_tensor *dst, const void *src0_dd,
|
|
183
|
-
const int32_t *src1_dd, float *dst_dd,
|
|
184
|
-
queue_ptr stream) {
|
|
185
|
-
|
|
186
|
-
GGML_TENSOR_BINARY_OP_LOCALS
|
|
187
|
-
|
|
188
|
-
const sycl::range<3> block_dims(1, 1, SYCL_GET_ROWS_BLOCK_SIZE);
|
|
189
|
-
const int block_num_x = (ne00 + 2*SYCL_GET_ROWS_BLOCK_SIZE - 1) / (2*SYCL_GET_ROWS_BLOCK_SIZE);
|
|
190
|
-
const sycl::range<3> block_nums(ne11 * ne12, ne10, block_num_x);
|
|
191
|
-
|
|
192
|
-
// strides in elements
|
|
193
|
-
//const size_t s0 = nb0 / ggml_element_size(dst);
|
|
194
|
-
const size_t s1 = nb1 / ggml_element_size(dst);
|
|
195
|
-
const size_t s2 = nb2 / ggml_element_size(dst);
|
|
196
|
-
const size_t s3 = nb3 / ggml_element_size(dst);
|
|
197
|
-
|
|
198
|
-
const size_t s10 = nb10 / ggml_element_size(src1);
|
|
199
|
-
const size_t s11 = nb11 / ggml_element_size(src1);
|
|
200
|
-
const size_t s12 = nb12 / ggml_element_size(src1);
|
|
201
|
-
//const size_t s13 = nb13 / ggml_element_size(src1);
|
|
202
|
-
|
|
203
|
-
GGML_ASSERT(ne00 % 2 == 0);
|
|
204
|
-
|
|
205
|
-
const uint8_t* src0_q = (const uint8_t*)src0_dd;
|
|
206
|
-
const size_t ncols = ne00;
|
|
207
|
-
const size_t nrows = ne01;
|
|
208
|
-
const sycl::half* src0_dq = (const sycl::half*)(src0_q + nrows * ncols / 2);
|
|
209
|
-
stream->parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
210
|
-
[=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]]{
|
|
211
|
-
k_get_rows_reorder<qk, qr, dq_reorder>(
|
|
212
|
-
src0_dd, src0_dq, src1_dd, dst_dd, ne00, ne12, s1, s2,
|
|
213
|
-
s3, nb01, nb02, nb03, s10, s11, s12, item_ct1);
|
|
214
|
-
});
|
|
121
|
+
sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
|
122
|
+
k_get_rows<qk, qr, dq>(src0_dd, src1_dd, dst_dd, ne00, ne12, s1, s2, s3, nb01, nb02, nb03, s10, s11, s12,
|
|
123
|
+
item_ct1);
|
|
124
|
+
});
|
|
215
125
|
|
|
216
126
|
GGML_UNUSED(dst);
|
|
217
127
|
GGML_UNUSED(ctx);
|
|
218
128
|
}
|
|
219
129
|
|
|
220
|
-
|
|
221
130
|
template <typename src0_t>
|
|
222
131
|
static void get_rows_sycl_float(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
|
223
132
|
const ggml_tensor *src1, ggml_tensor *dst,
|
|
@@ -245,9 +154,8 @@ static void get_rows_sycl_float(ggml_backend_sycl_context & ctx, const ggml_tens
|
|
|
245
154
|
dpct::has_capability_or_fail(stream->get_device(),
|
|
246
155
|
{sycl::aspect::fp16});
|
|
247
156
|
|
|
248
|
-
|
|
249
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
250
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
157
|
+
sycl_parallel_for(
|
|
158
|
+
stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
|
251
159
|
k_get_rows_float(src0_dd, src1_dd, dst_dd, ne00, ne12, s1, s2,
|
|
252
160
|
s3, nb01, nb02, nb03, s10, s11, s12, item_ct1);
|
|
253
161
|
});
|
|
@@ -277,13 +185,8 @@ void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
|
277
185
|
src1_i32, (float *)dst->data, ctx.stream());
|
|
278
186
|
break;
|
|
279
187
|
case GGML_TYPE_Q4_0:
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
src1_i32, (float *)dst->data, ctx.stream());
|
|
283
|
-
} else {
|
|
284
|
-
get_rows_sycl<QK4_0, QR4_0, dequantize_q4_0>(ctx, dst->src[0], dst->src[1], dst, (const float *)dst->src[0]->data,
|
|
285
|
-
src1_i32, (float *)dst->data, ctx.stream());
|
|
286
|
-
}
|
|
188
|
+
get_rows_sycl<QK4_0, QR4_0, dequantize_q4_0>(ctx, dst->src[0], dst->src[1], dst, (const float *)dst->src[0]->data,
|
|
189
|
+
src1_i32, (float *)dst->data, ctx.stream());
|
|
287
190
|
break;
|
|
288
191
|
case GGML_TYPE_Q4_1:
|
|
289
192
|
get_rows_sycl<QK4_1, QR4_1, dequantize_q4_1>(ctx, dst->src[0], dst->src[1], dst, (const float *)dst->src[0]->data,
|