@novastera-oss/llamarn 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/cpp/include/llama.h +8 -3
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86/libggml.so +0 -0
- package/android/src/main/jniLibs/x86/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/LlamaCppModel.cpp +56 -22
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +1 -2
- package/cpp/llama.cpp/README.md +4 -5
- package/cpp/llama.cpp/build-xcframework.sh +1 -1
- package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
- package/cpp/llama.cpp/common/arg.cpp +24 -0
- package/cpp/llama.cpp/common/chat.cpp +37 -20
- package/cpp/llama.cpp/common/chat.h +2 -0
- package/cpp/llama.cpp/common/common.cpp +3 -0
- package/cpp/llama.cpp/common/common.h +5 -0
- package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
- package/cpp/llama.cpp/convert_hf_to_gguf.py +860 -23
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +9 -0
- package/cpp/llama.cpp/ggml/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
- package/cpp/llama.cpp/ggml/include/ggml.h +206 -10
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +17 -1
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -8
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +36 -18
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +68 -5
- package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +16 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +37 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +10 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +109 -108
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +1027 -1038
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +53 -52
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +56 -55
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +42 -41
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +24 -23
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +29 -28
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +30 -29
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +83 -82
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +20 -19
- package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +9 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +111 -103
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1405 -240
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +25 -24
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +56 -40
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +212 -34
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +35 -11
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +187 -54
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +71 -29
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +8 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +14 -12
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +5 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +15 -10
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +12 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +269 -110
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +2 -8
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
- package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +97 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +14 -5
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +125 -183
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +51 -9
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +394 -80
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +616 -239
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +741 -571
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -24
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +99 -166
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +94 -72
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +697 -1098
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +6 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +104 -62
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +132 -201
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +39 -38
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +12 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +767 -292
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +58 -7
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +12 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +23 -3
- package/cpp/llama.cpp/ggml/src/ggml.c +449 -72
- package/cpp/llama.cpp/ggml/src/gguf.cpp +13 -2
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +285 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +27 -0
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +137 -21
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +109 -7
- package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
- package/cpp/llama.cpp/include/llama.h +8 -43
- package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +265 -3
- package/cpp/llama.cpp/src/llama-arch.h +36 -1
- package/cpp/llama.cpp/src/llama-batch.cpp +596 -359
- package/cpp/llama.cpp/src/llama-batch.h +105 -70
- package/cpp/llama.cpp/src/llama-chat.cpp +26 -6
- package/cpp/llama.cpp/src/llama-chat.h +1 -0
- package/cpp/llama.cpp/src/llama-context.cpp +101 -107
- package/cpp/llama.cpp/src/llama-context.h +13 -13
- package/cpp/llama.cpp/src/llama-graph.cpp +286 -404
- package/cpp/llama.cpp/src/llama-graph.h +78 -79
- package/cpp/llama.cpp/src/llama-hparams.cpp +11 -1
- package/cpp/llama.cpp/src/llama-hparams.h +11 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +74 -66
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +23 -26
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +312 -157
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +79 -46
- package/cpp/llama.cpp/src/llama-kv-cells.h +97 -21
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +73 -69
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +19 -22
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +88 -77
- package/cpp/llama.cpp/src/llama-memory-recurrent.h +15 -20
- package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
- package/cpp/llama.cpp/src/llama-memory.h +21 -22
- package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
- package/cpp/llama.cpp/src/llama-model.cpp +5301 -2922
- package/cpp/llama.cpp/src/llama-model.h +40 -0
- package/cpp/llama.cpp/src/llama-quant.cpp +88 -5
- package/cpp/llama.cpp/src/llama-vocab.cpp +37 -3
- package/cpp/llama.cpp/src/llama-vocab.h +42 -0
- package/cpp/rn-utils.h +3 -0
- package/ios/include/chat.h +2 -0
- package/ios/include/common.h +5 -0
- package/ios/include/llama.h +8 -43
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4863
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3742
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3891 -3744
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4863
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3742
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5095 -4900
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5066 -4871
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3919 -3773
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
- package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
|
@@ -3,27 +3,30 @@
|
|
|
3
3
|
|
|
4
4
|
#include "common.hpp"
|
|
5
5
|
#include "ggml.h"
|
|
6
|
-
#include <limits
|
|
6
|
+
#include <limits> // For std::numeric_limits
|
|
7
7
|
|
|
8
8
|
template <typename T>
|
|
9
9
|
T neg_infinity() {
|
|
10
10
|
return -std::numeric_limits<T>::infinity();
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
-
template<typename
|
|
13
|
+
template<typename T_Dst, typename T_Src = T_Dst>
|
|
14
14
|
struct typed_data {
|
|
15
|
-
const
|
|
16
|
-
|
|
15
|
+
const T_Src * src;
|
|
16
|
+
T_Dst * dst;
|
|
17
17
|
};
|
|
18
18
|
|
|
19
|
-
template<typename
|
|
20
|
-
typed_data<
|
|
19
|
+
template<typename T_Dst, typename T_Src = T_Dst>
|
|
20
|
+
typed_data<T_Dst, T_Src> cast_data(ggml_tensor * dst) {
|
|
21
21
|
return {
|
|
22
|
-
/* .src = */ static_cast<const
|
|
23
|
-
/* .dst = */ static_cast<
|
|
22
|
+
/* .src = */ static_cast<const T_Src *>(dst->src[0]->data),
|
|
23
|
+
/* .dst = */ static_cast<T_Dst *>(dst->data)
|
|
24
24
|
};
|
|
25
25
|
}
|
|
26
26
|
|
|
27
|
+
const float GELU_QUICK_COEF = -1.702f;
|
|
28
|
+
|
|
29
|
+
|
|
27
30
|
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
28
31
|
|
|
29
32
|
void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
@@ -73,5 +76,11 @@ void ggml_sycl_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
|
73
76
|
void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
74
77
|
|
|
75
78
|
void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
76
|
-
#endif // GGML_SYCL_ELEMENTWISE_HPP
|
|
77
79
|
|
|
80
|
+
void ggml_sycl_geglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
81
|
+
void ggml_sycl_reglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
82
|
+
void ggml_sycl_swiglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
83
|
+
void ggml_sycl_geglu_erf(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
84
|
+
void ggml_sycl_geglu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
85
|
+
|
|
86
|
+
#endif // GGML_SYCL_ELEMENTWISE_HPP
|
|
@@ -118,12 +118,10 @@ static void get_rows_sycl(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
|
|
|
118
118
|
|
|
119
119
|
GGML_ASSERT(ne00 % 2 == 0);
|
|
120
120
|
|
|
121
|
-
stream
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
s3, nb01, nb02, nb03, s10, s11, s12, item_ct1);
|
|
126
|
-
});
|
|
121
|
+
sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
|
122
|
+
k_get_rows<qk, qr, dq>(src0_dd, src1_dd, dst_dd, ne00, ne12, s1, s2, s3, nb01, nb02, nb03, s10, s11, s12,
|
|
123
|
+
item_ct1);
|
|
124
|
+
});
|
|
127
125
|
|
|
128
126
|
GGML_UNUSED(dst);
|
|
129
127
|
GGML_UNUSED(ctx);
|
|
@@ -156,9 +154,8 @@ static void get_rows_sycl_float(ggml_backend_sycl_context & ctx, const ggml_tens
|
|
|
156
154
|
dpct::has_capability_or_fail(stream->get_device(),
|
|
157
155
|
{sycl::aspect::fp16});
|
|
158
156
|
|
|
159
|
-
|
|
160
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
161
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
157
|
+
sycl_parallel_for(
|
|
158
|
+
stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
|
162
159
|
k_get_rows_float(src0_dd, src1_dd, dst_dd, ne00, ne12, s1, s2,
|
|
163
160
|
s3, nb01, nb02, nb03, s10, s11, s12, item_ct1);
|
|
164
161
|
});
|
|
@@ -41,6 +41,7 @@
|
|
|
41
41
|
#include "ggml-sycl/element_wise.hpp"
|
|
42
42
|
#include "ggml-sycl/presets.hpp"
|
|
43
43
|
#include "ggml-sycl/gemm.hpp"
|
|
44
|
+
#include "ggml-sycl/set_rows.hpp"
|
|
44
45
|
#include "ggml-sycl/sycl_hw.hpp"
|
|
45
46
|
#include "ggml-sycl/getrows.hpp"
|
|
46
47
|
#include "ggml.h"
|
|
@@ -83,9 +84,7 @@ static ggml_sycl_device_info ggml_sycl_init() {
|
|
|
83
84
|
|
|
84
85
|
info.devices[i].cc =
|
|
85
86
|
100 * prop.get_major_version() + 10 * prop.get_minor_version();
|
|
86
|
-
info.devices[i].
|
|
87
|
-
info.devices[i].opt_feature = check_gpu_optimize_feature(info.devices[i].hw_info.arch);
|
|
88
|
-
|
|
87
|
+
info.devices[i].opt_feature.reorder = device.ext_oneapi_architecture_is(syclex::arch_category::intel_gpu);
|
|
89
88
|
info.max_work_group_sizes[i] = prop.get_max_work_group_size();
|
|
90
89
|
}
|
|
91
90
|
|
|
@@ -195,7 +194,7 @@ static void ggml_check_sycl() try {
|
|
|
195
194
|
|
|
196
195
|
if (!initialized) {
|
|
197
196
|
g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
|
|
198
|
-
g_ggml_sycl_disable_optimize= get_sycl_env("GGML_SYCL_DISABLE_OPT",
|
|
197
|
+
g_ggml_sycl_disable_optimize = get_sycl_env("GGML_SYCL_DISABLE_OPT", 0);
|
|
199
198
|
g_ggml_sycl_disable_graph = get_sycl_env("GGML_SYCL_DISABLE_GRAPH", 1);
|
|
200
199
|
g_ggml_sycl_disable_dnn = get_sycl_env("GGML_SYCL_DISABLE_DNN", 0);
|
|
201
200
|
g_ggml_sycl_prioritize_dmmv = get_sycl_env("GGML_SYCL_PRIORITIZE_DMMV", 0);
|
|
@@ -1697,7 +1696,7 @@ static void diag_mask_inf_f32(const float * x, float * dst, const int ncols, con
|
|
|
1697
1696
|
dst[i] = x[i] - (col > n_past + row % rows_per_channel) * FLT_MAX;
|
|
1698
1697
|
}
|
|
1699
1698
|
|
|
1700
|
-
static void scale_f32(const float * x, float * dst, const float scale, const int k,
|
|
1699
|
+
static void scale_f32(const float * x, float * dst, const float scale, const float bias, const int k,
|
|
1701
1700
|
const sycl::nd_item<3> &item_ct1) {
|
|
1702
1701
|
const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
|
|
1703
1702
|
item_ct1.get_local_id(2);
|
|
@@ -1706,7 +1705,7 @@ static void scale_f32(const float * x, float * dst, const float scale, const int
|
|
|
1706
1705
|
return;
|
|
1707
1706
|
}
|
|
1708
1707
|
|
|
1709
|
-
dst[i] = scale * x[i];
|
|
1708
|
+
dst[i] = scale * x[i] + bias;
|
|
1710
1709
|
}
|
|
1711
1710
|
|
|
1712
1711
|
|
|
@@ -1844,7 +1843,7 @@ static void ggml_mul_mat_vec_nc_f16_f32_sycl(
|
|
|
1844
1843
|
|
|
1845
1844
|
|
|
1846
1845
|
|
|
1847
|
-
static void scale_f32_sycl(const float *x, float *dst, const float scale,
|
|
1846
|
+
static void scale_f32_sycl(const float *x, float *dst, const float scale, const float bias,
|
|
1848
1847
|
const int k, queue_ptr stream) {
|
|
1849
1848
|
const int num_blocks = (k + SYCL_SCALE_BLOCK_SIZE - 1) / SYCL_SCALE_BLOCK_SIZE;
|
|
1850
1849
|
stream->parallel_for(
|
|
@@ -1852,7 +1851,7 @@ static void scale_f32_sycl(const float *x, float *dst, const float scale,
|
|
|
1852
1851
|
sycl::range<3>(1, 1, SYCL_SCALE_BLOCK_SIZE),
|
|
1853
1852
|
sycl::range<3>(1, 1, SYCL_SCALE_BLOCK_SIZE)),
|
|
1854
1853
|
[=](sycl::nd_item<3> item_ct1) {
|
|
1855
|
-
scale_f32(x, dst, scale, k, item_ct1);
|
|
1854
|
+
scale_f32(x, dst, scale, bias, k, item_ct1);
|
|
1856
1855
|
});
|
|
1857
1856
|
}
|
|
1858
1857
|
|
|
@@ -1887,13 +1886,12 @@ static void argsort_f32_i32_sycl(const float *x, int *dst, const int ncols,
|
|
|
1887
1886
|
const size_t shared_mem = ncols_pad * sizeof(int);
|
|
1888
1887
|
|
|
1889
1888
|
if (order == GGML_SORT_ORDER_ASC) {
|
|
1890
|
-
stream
|
|
1889
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
|
1891
1890
|
sycl::local_accessor<uint8_t, 1> dpct_local_acc_ct1(
|
|
1892
1891
|
sycl::range<1>(shared_mem), cgh);
|
|
1893
1892
|
|
|
1894
|
-
|
|
1895
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
1896
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
1893
|
+
sycl_parallel_for(
|
|
1894
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
|
1897
1895
|
k_argsort_f32_i32<GGML_SORT_ORDER_ASC>(
|
|
1898
1896
|
x, dst, ncols, ncols_pad, item_ct1,
|
|
1899
1897
|
dpct_local_acc_ct1.get_multi_ptr<sycl::access::decorated::no>()
|
|
@@ -1901,13 +1899,12 @@ static void argsort_f32_i32_sycl(const float *x, int *dst, const int ncols,
|
|
|
1901
1899
|
});
|
|
1902
1900
|
});
|
|
1903
1901
|
} else if (order == GGML_SORT_ORDER_DESC) {
|
|
1904
|
-
stream
|
|
1902
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
|
1905
1903
|
sycl::local_accessor<uint8_t, 1> dpct_local_acc_ct1(
|
|
1906
1904
|
sycl::range<1>(shared_mem), cgh);
|
|
1907
1905
|
|
|
1908
|
-
|
|
1909
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
1910
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
1906
|
+
sycl_parallel_for(
|
|
1907
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
|
1911
1908
|
k_argsort_f32_i32<GGML_SORT_ORDER_DESC>(
|
|
1912
1909
|
x, dst, ncols, ncols_pad, item_ct1,
|
|
1913
1910
|
dpct_local_acc_ct1.get_multi_ptr<sycl::access::decorated::no>()
|
|
@@ -1925,50 +1922,47 @@ static void argmax_f32_i32_sycl(const float *x, int *dst, const int ncols,
|
|
|
1925
1922
|
const sycl::range<3> block_nums(1, nrows, 1);
|
|
1926
1923
|
const size_t shared_mem = 256 * sizeof(float);
|
|
1927
1924
|
|
|
1928
|
-
stream
|
|
1925
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
|
1929
1926
|
sycl::local_accessor<float, 1> shared_data(
|
|
1930
1927
|
sycl::range<1>(shared_mem/sizeof(float)), cgh);
|
|
1931
1928
|
sycl::local_accessor<int, 1> shared_indices(
|
|
1932
1929
|
sycl::range<1>(shared_mem/sizeof(float)), cgh);
|
|
1933
1930
|
|
|
1934
|
-
cgh
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
const int tid = item_ct1.get_local_id(2);
|
|
1938
|
-
const int row = item_ct1.get_global_id(1);
|
|
1939
|
-
|
|
1940
|
-
float max_val = -INFINITY;
|
|
1941
|
-
int max_idx = -1;
|
|
1942
|
-
|
|
1943
|
-
for (int col = tid; col < ncols; col += 256) {
|
|
1944
|
-
float val = x[row * ncols + col];
|
|
1945
|
-
if (val > max_val) {
|
|
1946
|
-
max_val = val;
|
|
1947
|
-
max_idx = col;
|
|
1948
|
-
}
|
|
1949
|
-
}
|
|
1931
|
+
sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
|
1932
|
+
const int tid = item_ct1.get_local_id(2);
|
|
1933
|
+
const int row = item_ct1.get_global_id(1);
|
|
1950
1934
|
|
|
1951
|
-
|
|
1952
|
-
|
|
1953
|
-
item_ct1.barrier(sycl::access::fence_space::local_space);
|
|
1935
|
+
float max_val = -INFINITY;
|
|
1936
|
+
int max_idx = -1;
|
|
1954
1937
|
|
|
1955
|
-
|
|
1956
|
-
|
|
1957
|
-
|
|
1958
|
-
|
|
1959
|
-
|
|
1960
|
-
shared_data[tid] = val2;
|
|
1961
|
-
shared_indices[tid] = shared_indices[tid + stride];
|
|
1962
|
-
}
|
|
1963
|
-
}
|
|
1964
|
-
item_ct1.barrier(sycl::access::fence_space::local_space);
|
|
1938
|
+
for (int col = tid; col < ncols; col += 256) {
|
|
1939
|
+
float val = x[row * ncols + col];
|
|
1940
|
+
if (val > max_val) {
|
|
1941
|
+
max_val = val;
|
|
1942
|
+
max_idx = col;
|
|
1965
1943
|
}
|
|
1944
|
+
}
|
|
1966
1945
|
|
|
1946
|
+
shared_data[tid] = max_val;
|
|
1947
|
+
shared_indices[tid] = max_idx;
|
|
1948
|
+
item_ct1.barrier(sycl::access::fence_space::local_space);
|
|
1967
1949
|
|
|
1968
|
-
|
|
1969
|
-
|
|
1950
|
+
for (int stride = 256 / 2; stride > 0; stride >>= 1) {
|
|
1951
|
+
if (tid < stride) {
|
|
1952
|
+
float val1 = shared_data[tid];
|
|
1953
|
+
float val2 = shared_data[tid + stride];
|
|
1954
|
+
if (val2 > val1) {
|
|
1955
|
+
shared_data[tid] = val2;
|
|
1956
|
+
shared_indices[tid] = shared_indices[tid + stride];
|
|
1957
|
+
}
|
|
1970
1958
|
}
|
|
1971
|
-
|
|
1959
|
+
item_ct1.barrier(sycl::access::fence_space::local_space);
|
|
1960
|
+
}
|
|
1961
|
+
|
|
1962
|
+
if (tid == 0) {
|
|
1963
|
+
dst[row] = shared_indices[0];
|
|
1964
|
+
}
|
|
1965
|
+
});
|
|
1972
1966
|
});
|
|
1973
1967
|
}
|
|
1974
1968
|
static void diag_mask_inf_f32_sycl(const float *x, float *dst,
|
|
@@ -2326,9 +2320,11 @@ inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, ggml_tensor * ds
|
|
|
2326
2320
|
float * dst_dd = static_cast<float *>(dst->data);
|
|
2327
2321
|
|
|
2328
2322
|
float scale;
|
|
2329
|
-
|
|
2323
|
+
float bias;
|
|
2324
|
+
memcpy(&scale, (float *) dst->op_params + 0, sizeof(float));
|
|
2325
|
+
memcpy(&bias, (float *) dst->op_params + 1, sizeof(float));
|
|
2330
2326
|
|
|
2331
|
-
scale_f32_sycl(src0_dd, dst_dd, scale, ggml_nelements(dst->src[0]), main_stream);
|
|
2327
|
+
scale_f32_sycl(src0_dd, dst_dd, scale, bias, ggml_nelements(dst->src[0]), main_stream);
|
|
2332
2328
|
/*
|
|
2333
2329
|
DPCT1010:87: SYCL uses exceptions to report errors and does not use the
|
|
2334
2330
|
error codes. The call was replaced with 0. You need to rewrite this code.
|
|
@@ -2952,7 +2948,7 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx, cons
|
|
|
2952
2948
|
void ** ptrs_dst_get = ptrs_dst.get();
|
|
2953
2949
|
size_t nb12_scaled = src1->type == GGML_TYPE_F16 ? nb12 : s12 * sizeof(sycl::half);
|
|
2954
2950
|
size_t nb13_scaled = src1->type == GGML_TYPE_F16 ? nb13 : s13 * sizeof(sycl::half);
|
|
2955
|
-
cgh
|
|
2951
|
+
sycl_parallel_for(cgh, sycl::nd_range<3>(block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
|
2956
2952
|
k_compute_batched_ptrs(src0_f16, src1_f16, dst_ddf, ptrs_src_get, ptrs_dst_get, ne12, ne13, ne23, nb02,
|
|
2957
2953
|
nb03, nb12_scaled, nb13_scaled, nbd2, nbd3, r2, r3, item_ct1);
|
|
2958
2954
|
});
|
|
@@ -3456,7 +3452,7 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx,
|
|
|
3456
3452
|
{
|
|
3457
3453
|
sycl::range<3> block_dims(1, 1, std::min((unsigned int)ne10, 768u));
|
|
3458
3454
|
sycl::range<3> grid_dims(1, n_ids, ids->ne[1]);
|
|
3459
|
-
stream
|
|
3455
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
|
3460
3456
|
sycl::local_accessor<int, 0> src1_row_acc(cgh);
|
|
3461
3457
|
|
|
3462
3458
|
char *__restrict src1_contiguous_get =
|
|
@@ -3468,9 +3464,8 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx,
|
|
|
3468
3464
|
size_t ids_nb_ct6 = ids->nb[1];
|
|
3469
3465
|
size_t ids_nb_ct7 = ids->nb[0];
|
|
3470
3466
|
|
|
3471
|
-
|
|
3472
|
-
sycl::nd_range<3>(grid_dims * block_dims, block_dims),
|
|
3473
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
3467
|
+
sycl_parallel_for(
|
|
3468
|
+
cgh, sycl::nd_range<3>(grid_dims * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
|
3474
3469
|
k_copy_src1_to_contiguous(
|
|
3475
3470
|
src1_original, src1_contiguous_get,
|
|
3476
3471
|
dev_cur_src1_row_get,
|
|
@@ -3501,15 +3496,14 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx,
|
|
|
3501
3496
|
{
|
|
3502
3497
|
sycl::range<3> block_dims(1, 1, std::min((unsigned int)ne0, 768u));
|
|
3503
3498
|
sycl::range<3> grid_dims(1, 1, num_src1_rows);
|
|
3504
|
-
stream
|
|
3499
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
|
3505
3500
|
const char *__restrict dst_contiguous_get =
|
|
3506
3501
|
dst_contiguous.get();
|
|
3507
3502
|
const mmid_row_mapping *__restrict dev_row_mapping_get =
|
|
3508
3503
|
dev_row_mapping.get();
|
|
3509
3504
|
|
|
3510
|
-
|
|
3511
|
-
sycl::nd_range<3>(grid_dims * block_dims, block_dims),
|
|
3512
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
3505
|
+
sycl_parallel_for(
|
|
3506
|
+
cgh, sycl::nd_range<3>(grid_dims * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
|
3513
3507
|
k_copy_dst_from_contiguous(dst_original,
|
|
3514
3508
|
dst_contiguous_get,
|
|
3515
3509
|
dev_row_mapping_get,
|
|
@@ -3612,6 +3606,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg
|
|
|
3612
3606
|
case GGML_OP_GET_ROWS:
|
|
3613
3607
|
ggml_sycl_get_rows(ctx, dst);
|
|
3614
3608
|
break;
|
|
3609
|
+
case GGML_OP_SET_ROWS:
|
|
3610
|
+
ggml_sycl_op_set_rows(ctx, dst);
|
|
3611
|
+
break;
|
|
3615
3612
|
case GGML_OP_DUP:
|
|
3616
3613
|
ggml_sycl_dup(ctx, dst);
|
|
3617
3614
|
break;
|
|
@@ -3685,6 +3682,27 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg
|
|
|
3685
3682
|
return false;
|
|
3686
3683
|
}
|
|
3687
3684
|
break;
|
|
3685
|
+
case GGML_OP_GLU:
|
|
3686
|
+
switch (ggml_get_glu_op(dst)) {
|
|
3687
|
+
case GGML_GLU_OP_REGLU:
|
|
3688
|
+
ggml_sycl_reglu(ctx, dst);
|
|
3689
|
+
break;
|
|
3690
|
+
case GGML_GLU_OP_GEGLU:
|
|
3691
|
+
ggml_sycl_geglu(ctx, dst);
|
|
3692
|
+
break;
|
|
3693
|
+
case GGML_GLU_OP_SWIGLU:
|
|
3694
|
+
ggml_sycl_swiglu(ctx, dst);
|
|
3695
|
+
break;
|
|
3696
|
+
case GGML_GLU_OP_GEGLU_ERF:
|
|
3697
|
+
ggml_sycl_geglu_erf(ctx, dst);
|
|
3698
|
+
break;
|
|
3699
|
+
case GGML_GLU_OP_GEGLU_QUICK:
|
|
3700
|
+
ggml_sycl_geglu_quick(ctx, dst);
|
|
3701
|
+
break;
|
|
3702
|
+
default:
|
|
3703
|
+
return false;
|
|
3704
|
+
}
|
|
3705
|
+
break;
|
|
3688
3706
|
case GGML_OP_NORM:
|
|
3689
3707
|
ggml_sycl_norm(ctx, dst);
|
|
3690
3708
|
break;
|
|
@@ -4221,6 +4239,18 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
|
|
|
4221
4239
|
default:
|
|
4222
4240
|
return false;
|
|
4223
4241
|
}
|
|
4242
|
+
case GGML_OP_GLU:
|
|
4243
|
+
switch (ggml_get_glu_op(op)) {
|
|
4244
|
+
case GGML_GLU_OP_REGLU:
|
|
4245
|
+
case GGML_GLU_OP_GEGLU:
|
|
4246
|
+
case GGML_GLU_OP_SWIGLU:
|
|
4247
|
+
case GGML_GLU_OP_GEGLU_ERF:
|
|
4248
|
+
case GGML_GLU_OP_GEGLU_QUICK:
|
|
4249
|
+
return ggml_is_contiguous_1(op->src[0]);
|
|
4250
|
+
default:
|
|
4251
|
+
return false;
|
|
4252
|
+
}
|
|
4253
|
+
break;
|
|
4224
4254
|
case GGML_OP_MUL_MAT:
|
|
4225
4255
|
case GGML_OP_MUL_MAT_ID:
|
|
4226
4256
|
{
|
|
@@ -4269,6 +4299,12 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
|
|
|
4269
4299
|
return false;
|
|
4270
4300
|
}
|
|
4271
4301
|
}
|
|
4302
|
+
case GGML_OP_SET_ROWS:
|
|
4303
|
+
{
|
|
4304
|
+
// TODO: add support
|
|
4305
|
+
// ref: https://github.com/ggml-org/llama.cpp/pull/14274
|
|
4306
|
+
return (op->type == GGML_TYPE_F32 || (op->type == GGML_TYPE_F16 && op->src[0]->type == GGML_TYPE_F32 && op->src[1]->type == GGML_TYPE_I64));
|
|
4307
|
+
} break;
|
|
4272
4308
|
case GGML_OP_CPY:
|
|
4273
4309
|
{
|
|
4274
4310
|
ggml_type src0_type = op->src[0]->type;
|
|
@@ -4379,9 +4415,15 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
|
|
|
4379
4415
|
return true;
|
|
4380
4416
|
case GGML_OP_CONT:
|
|
4381
4417
|
return op->src[0]->type != GGML_TYPE_BF16;
|
|
4382
|
-
case GGML_OP_DIAG_MASK_INF:
|
|
4383
4418
|
case GGML_OP_SOFT_MAX:
|
|
4384
|
-
|
|
4419
|
+
// TODO: support batching
|
|
4420
|
+
if (op->src[0]->ne[3] != 1) {
|
|
4421
|
+
return false;
|
|
4422
|
+
}
|
|
4423
|
+
// TODO: support broadcast
|
|
4424
|
+
// ref: https://github.com/ggml-org/llama.cpp/pull/14435
|
|
4425
|
+
return !op->src[1] || (op->src[1]->ne[2] == 1 && op->src[1]->ne[3] == 1);
|
|
4426
|
+
case GGML_OP_DIAG_MASK_INF:
|
|
4385
4427
|
case GGML_OP_ROPE:
|
|
4386
4428
|
case GGML_OP_IM2COL:
|
|
4387
4429
|
return true;
|
|
@@ -11,13 +11,13 @@ static void gated_linear_attn_f32_kernel(const dpct::queue_ptr stream, u_int B,
|
|
|
11
11
|
const u_int n_seq_tokens = T / B;
|
|
12
12
|
sycl::range<1> block_dims((C / H));
|
|
13
13
|
sycl::range<1> grid_dims((B * H));
|
|
14
|
-
stream
|
|
14
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
|
15
15
|
/* local memory accessors*/
|
|
16
16
|
auto _k = sycl::local_accessor<float, 1>(sycl::range<1>(head_size), cgh);
|
|
17
17
|
auto _r = sycl::local_accessor<float, 1>(sycl::range<1>(head_size), cgh);
|
|
18
18
|
auto _td = sycl::local_accessor<float, 1>(sycl::range<1>(head_size), cgh);
|
|
19
19
|
|
|
20
|
-
cgh
|
|
20
|
+
sycl_parallel_for<1>(cgh, sycl::nd_range<1>(grid_dims * block_dims, block_dims), [=](sycl::nd_item<1> item) {
|
|
21
21
|
u_int tid = item.get_local_id(0);
|
|
22
22
|
u_int bid = item.get_group(0);
|
|
23
23
|
|
|
@@ -70,7 +70,7 @@ static void im2col_sycl_internal(const float * x, T * dst, int64_t IW, int64_t I
|
|
|
70
70
|
|
|
71
71
|
const int64_t CHW = IC * KH * KW;
|
|
72
72
|
|
|
73
|
-
stream
|
|
73
|
+
sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * local_range, local_range), [=](sycl::nd_item<3> item_ct1) {
|
|
74
74
|
im2col_kernel<T>(x, dst, batch_offset, offset_delta, IC, IW, IH, OH, OW, KW, KH, parallel_elements, CHW, s0, s1,
|
|
75
75
|
p0, p1, d0, d1, item_ct1);
|
|
76
76
|
});
|