@novastera-oss/llamarn 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/cpp/include/llama.h +8 -3
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86/libggml.so +0 -0
- package/android/src/main/jniLibs/x86/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/LlamaCppModel.cpp +56 -22
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +1 -2
- package/cpp/llama.cpp/README.md +4 -5
- package/cpp/llama.cpp/build-xcframework.sh +1 -1
- package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
- package/cpp/llama.cpp/common/arg.cpp +24 -0
- package/cpp/llama.cpp/common/chat.cpp +37 -20
- package/cpp/llama.cpp/common/chat.h +2 -0
- package/cpp/llama.cpp/common/common.cpp +3 -0
- package/cpp/llama.cpp/common/common.h +5 -0
- package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
- package/cpp/llama.cpp/convert_hf_to_gguf.py +860 -23
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +9 -0
- package/cpp/llama.cpp/ggml/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
- package/cpp/llama.cpp/ggml/include/ggml.h +206 -10
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +17 -1
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -8
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +36 -18
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +68 -5
- package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +16 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +37 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +10 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +109 -108
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +1027 -1038
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +53 -52
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +56 -55
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +42 -41
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +24 -23
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +29 -28
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +30 -29
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +83 -82
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +20 -19
- package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +9 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +111 -103
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1405 -240
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +25 -24
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +56 -40
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +212 -34
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +35 -11
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +187 -54
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +71 -29
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +8 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +14 -12
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +5 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +15 -10
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +12 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +269 -110
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +2 -8
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
- package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +97 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +14 -5
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +125 -183
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +51 -9
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +394 -80
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +616 -239
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +741 -571
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -24
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +99 -166
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +94 -72
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +697 -1098
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +6 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +104 -62
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +132 -201
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +39 -38
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +12 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +767 -292
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +58 -7
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +12 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +23 -3
- package/cpp/llama.cpp/ggml/src/ggml.c +449 -72
- package/cpp/llama.cpp/ggml/src/gguf.cpp +13 -2
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +285 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +27 -0
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +137 -21
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +109 -7
- package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
- package/cpp/llama.cpp/include/llama.h +8 -43
- package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +265 -3
- package/cpp/llama.cpp/src/llama-arch.h +36 -1
- package/cpp/llama.cpp/src/llama-batch.cpp +596 -359
- package/cpp/llama.cpp/src/llama-batch.h +105 -70
- package/cpp/llama.cpp/src/llama-chat.cpp +26 -6
- package/cpp/llama.cpp/src/llama-chat.h +1 -0
- package/cpp/llama.cpp/src/llama-context.cpp +101 -107
- package/cpp/llama.cpp/src/llama-context.h +13 -13
- package/cpp/llama.cpp/src/llama-graph.cpp +286 -404
- package/cpp/llama.cpp/src/llama-graph.h +78 -79
- package/cpp/llama.cpp/src/llama-hparams.cpp +11 -1
- package/cpp/llama.cpp/src/llama-hparams.h +11 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +74 -66
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +23 -26
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +312 -157
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +79 -46
- package/cpp/llama.cpp/src/llama-kv-cells.h +97 -21
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +73 -69
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +19 -22
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +88 -77
- package/cpp/llama.cpp/src/llama-memory-recurrent.h +15 -20
- package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
- package/cpp/llama.cpp/src/llama-memory.h +21 -22
- package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
- package/cpp/llama.cpp/src/llama-model.cpp +5301 -2922
- package/cpp/llama.cpp/src/llama-model.h +40 -0
- package/cpp/llama.cpp/src/llama-quant.cpp +88 -5
- package/cpp/llama.cpp/src/llama-vocab.cpp +37 -3
- package/cpp/llama.cpp/src/llama-vocab.h +42 -0
- package/cpp/rn-utils.h +3 -0
- package/ios/include/chat.h +2 -0
- package/ios/include/common.h +5 -0
- package/ios/include/llama.h +8 -43
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4863
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3742
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3891 -3744
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4863
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3742
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5095 -4900
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5066 -4871
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3919 -3773
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
- package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
|
@@ -22,32 +22,45 @@
|
|
|
22
22
|
REQD_SUBGROUP_SIZE_64
|
|
23
23
|
#endif
|
|
24
24
|
kernel void kernel_soft_max_4(
|
|
25
|
-
global
|
|
25
|
+
global char * src0,
|
|
26
26
|
ulong offset0,
|
|
27
|
-
global
|
|
27
|
+
global char * src1,
|
|
28
28
|
ulong offset1,
|
|
29
|
-
global
|
|
29
|
+
global char * dst,
|
|
30
30
|
ulong offsetd,
|
|
31
31
|
int ne00,
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
ulong nb01,
|
|
33
|
+
ulong nb02,
|
|
34
|
+
ulong nb03,
|
|
35
|
+
int ne12,
|
|
36
|
+
int ne13,
|
|
37
|
+
ulong nb11,
|
|
38
|
+
ulong nb12,
|
|
39
|
+
ulong nb13,
|
|
40
|
+
ulong nb1,
|
|
41
|
+
ulong nb2,
|
|
42
|
+
ulong nb3,
|
|
34
43
|
float scale,
|
|
35
44
|
float max_bias,
|
|
36
45
|
float m0,
|
|
37
46
|
float m1,
|
|
38
47
|
int n_head_log2
|
|
39
48
|
) {
|
|
40
|
-
src0 =
|
|
41
|
-
src1 =
|
|
42
|
-
dst
|
|
49
|
+
src0 = src0 + offset0;
|
|
50
|
+
src1 = src1 + offset1;
|
|
51
|
+
dst = dst + offsetd;
|
|
43
52
|
|
|
44
53
|
int i03 = get_group_id(2);
|
|
45
54
|
int i02 = get_group_id(1);
|
|
46
55
|
int i01 = get_group_id(0);
|
|
47
56
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
57
|
+
int i13 = i03%ne13;
|
|
58
|
+
int i12 = i02%ne12;
|
|
59
|
+
int i11 = i01;
|
|
60
|
+
|
|
61
|
+
global float4 * psrc4 = (global float4 *)(src0 + i01*nb01 + i02*nb02 + i03*nb03);
|
|
62
|
+
global float4 * pmask = src1 != src0 ? (global float4 *)(src1 + i11*nb11 + i12*nb12 + i13*nb13) : 0;
|
|
63
|
+
global float4 * pdst4 = (global float4 *)(dst + i01*nb1 + i02*nb2 + i03*nb3);
|
|
51
64
|
|
|
52
65
|
float slope = 1.0f;
|
|
53
66
|
|
|
@@ -22,32 +22,45 @@
|
|
|
22
22
|
REQD_SUBGROUP_SIZE_64
|
|
23
23
|
#endif
|
|
24
24
|
kernel void kernel_soft_max_f16(
|
|
25
|
-
global
|
|
25
|
+
global char * src0,
|
|
26
26
|
ulong offset0,
|
|
27
|
-
global
|
|
27
|
+
global char * src1,
|
|
28
28
|
ulong offset1,
|
|
29
|
-
global
|
|
29
|
+
global char * dst,
|
|
30
30
|
ulong offsetd,
|
|
31
31
|
int ne00,
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
ulong nb01,
|
|
33
|
+
ulong nb02,
|
|
34
|
+
ulong nb03,
|
|
35
|
+
int ne12,
|
|
36
|
+
int ne13,
|
|
37
|
+
ulong nb11,
|
|
38
|
+
ulong nb12,
|
|
39
|
+
ulong nb13,
|
|
40
|
+
ulong nb1,
|
|
41
|
+
ulong nb2,
|
|
42
|
+
ulong nb3,
|
|
34
43
|
float scale,
|
|
35
44
|
float max_bias,
|
|
36
45
|
float m0,
|
|
37
46
|
float m1,
|
|
38
47
|
int n_head_log2
|
|
39
48
|
) {
|
|
40
|
-
src0 =
|
|
41
|
-
src1 =
|
|
42
|
-
dst
|
|
49
|
+
src0 = src0 + offset0;
|
|
50
|
+
src1 = src1 + offset1;
|
|
51
|
+
dst = dst + offsetd;
|
|
43
52
|
|
|
44
53
|
int i03 = get_group_id(2);
|
|
45
54
|
int i02 = get_group_id(1);
|
|
46
55
|
int i01 = get_group_id(0);
|
|
47
56
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
57
|
+
int i13 = i03%ne13;
|
|
58
|
+
int i12 = i02%ne12;
|
|
59
|
+
int i11 = i01;
|
|
60
|
+
|
|
61
|
+
global float * psrc0 = (global float *)(src0 + i01*nb01 + i02*nb02 + i03*nb03);
|
|
62
|
+
global half * pmask = src1 != src0 ? (global half *)(src1 + i11*nb11 + i12*nb12 + i13*nb13) : 0;
|
|
63
|
+
global float * pdst = (global float *)(dst + i01*nb1 + i02*nb2 + i03*nb3);
|
|
51
64
|
|
|
52
65
|
float slope = 1.0f;
|
|
53
66
|
|
|
@@ -22,32 +22,45 @@
|
|
|
22
22
|
REQD_SUBGROUP_SIZE_64
|
|
23
23
|
#endif
|
|
24
24
|
kernel void kernel_soft_max(
|
|
25
|
-
global
|
|
25
|
+
global char * src0,
|
|
26
26
|
ulong offset0,
|
|
27
|
-
global
|
|
27
|
+
global char * src1,
|
|
28
28
|
ulong offset1,
|
|
29
|
-
global
|
|
29
|
+
global char * dst,
|
|
30
30
|
ulong offsetd,
|
|
31
31
|
int ne00,
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
ulong nb01,
|
|
33
|
+
ulong nb02,
|
|
34
|
+
ulong nb03,
|
|
35
|
+
int ne12,
|
|
36
|
+
int ne13,
|
|
37
|
+
ulong nb11,
|
|
38
|
+
ulong nb12,
|
|
39
|
+
ulong nb13,
|
|
40
|
+
ulong nb1,
|
|
41
|
+
ulong nb2,
|
|
42
|
+
ulong nb3,
|
|
34
43
|
float scale,
|
|
35
44
|
float max_bias,
|
|
36
45
|
float m0,
|
|
37
46
|
float m1,
|
|
38
47
|
int n_head_log2
|
|
39
48
|
) {
|
|
40
|
-
src0 =
|
|
41
|
-
src1 =
|
|
42
|
-
dst
|
|
49
|
+
src0 = src0 + offset0;
|
|
50
|
+
src1 = src1 + offset1;
|
|
51
|
+
dst = dst + offsetd;
|
|
43
52
|
|
|
44
53
|
int i03 = get_group_id(2);
|
|
45
54
|
int i02 = get_group_id(1);
|
|
46
55
|
int i01 = get_group_id(0);
|
|
47
56
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
57
|
+
int i13 = i03%ne13;
|
|
58
|
+
int i12 = i02%ne12;
|
|
59
|
+
int i11 = i01;
|
|
60
|
+
|
|
61
|
+
global float * psrc0 = (global float *)(src0 + i01*nb01 + i02*nb02 + i03*nb03);
|
|
62
|
+
global float * pmask = src1 != src0 ? (global float *)(src1 + i11*nb11 + i12*nb12 + i13*nb13) : 0;
|
|
63
|
+
global float * pdst = (global float *)(dst + i01*nb1 + i02*nb2 + i03*nb3);
|
|
51
64
|
|
|
52
65
|
float slope = 1.0f;
|
|
53
66
|
|
|
@@ -60,7 +60,8 @@ kernel void kernel_upscale_bilinear(
|
|
|
60
60
|
float sf0,
|
|
61
61
|
float sf1,
|
|
62
62
|
float sf2,
|
|
63
|
-
float sf3
|
|
63
|
+
float sf3,
|
|
64
|
+
float pixel_offset
|
|
64
65
|
) {
|
|
65
66
|
global const char * src_base = (global const char *)p_src0 + off_src0;
|
|
66
67
|
global float * dst_base = (global float *)((global char *)p_dst + off_dst);
|
|
@@ -80,8 +81,6 @@ kernel void kernel_upscale_bilinear(
|
|
|
80
81
|
int i02_src = (int)(i12_dst / sf2);
|
|
81
82
|
int i03_src = (int)(i13_dst / sf3);
|
|
82
83
|
|
|
83
|
-
const float pixel_offset = 0.5f;
|
|
84
|
-
|
|
85
84
|
float y_src_f = ((float)i11_dst + pixel_offset) / sf1 - pixel_offset;
|
|
86
85
|
long y0_src = (long)floor(y_src_f);
|
|
87
86
|
long y1_src = y0_src + 1;
|
|
@@ -568,14 +568,14 @@ static float make_qkx2_quants(int n, int nmax, const float * GGML_RESTRICT x, co
|
|
|
568
568
|
}
|
|
569
569
|
float iscale = nmax/(max - min);
|
|
570
570
|
float scale = 1/iscale;
|
|
571
|
-
float
|
|
571
|
+
float best_error = 0;
|
|
572
572
|
for (int i = 0; i < n; ++i) {
|
|
573
573
|
int l = nearest_int(iscale*(x[i] - min));
|
|
574
574
|
L[i] = MAX(0, MIN(nmax, l));
|
|
575
575
|
float diff = scale * L[i] + min - x[i];
|
|
576
576
|
diff = use_mad ? fabsf(diff) : diff * diff;
|
|
577
577
|
float w = weights[i];
|
|
578
|
-
|
|
578
|
+
best_error += w * diff;
|
|
579
579
|
}
|
|
580
580
|
if (nstep < 1) {
|
|
581
581
|
*the_min = -min;
|
|
@@ -601,18 +601,18 @@ static float make_qkx2_quants(int n, int nmax, const float * GGML_RESTRICT x, co
|
|
|
601
601
|
this_min = 0;
|
|
602
602
|
this_scale = sum_xl / sum_l2;
|
|
603
603
|
}
|
|
604
|
-
float
|
|
604
|
+
float cur_error = 0;
|
|
605
605
|
for (int i = 0; i < n; ++i) {
|
|
606
606
|
float diff = this_scale * Laux[i] + this_min - x[i];
|
|
607
607
|
diff = use_mad ? fabsf(diff) : diff * diff;
|
|
608
608
|
float w = weights[i];
|
|
609
|
-
|
|
609
|
+
cur_error += w * diff;
|
|
610
610
|
}
|
|
611
|
-
if (
|
|
611
|
+
if (cur_error < best_error) {
|
|
612
612
|
for (int i = 0; i < n; ++i) {
|
|
613
613
|
L[i] = Laux[i];
|
|
614
614
|
}
|
|
615
|
-
|
|
615
|
+
best_error = cur_error;
|
|
616
616
|
scale = this_scale;
|
|
617
617
|
min = this_min;
|
|
618
618
|
}
|
|
@@ -225,9 +225,9 @@ struct bin_bcast_sycl {
|
|
|
225
225
|
dpct::has_capability_or_fail(stream->get_device(),
|
|
226
226
|
{sycl::aspect::fp16});
|
|
227
227
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
228
|
+
sycl_parallel_for(
|
|
229
|
+
stream,
|
|
230
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, block_num) * sycl::range<3>(1, 1, block_size),
|
|
231
231
|
sycl::range<3>(1, 1, block_size)),
|
|
232
232
|
[=](sycl::nd_item<3> item_ct1) {
|
|
233
233
|
k_bin_bcast_unravel<bin_op>(
|
|
@@ -246,9 +246,8 @@ struct bin_bcast_sycl {
|
|
|
246
246
|
dpct::has_capability_or_fail(stream->get_device(),
|
|
247
247
|
{sycl::aspect::fp16});
|
|
248
248
|
|
|
249
|
-
|
|
250
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
251
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
249
|
+
sycl_parallel_for(
|
|
250
|
+
stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
|
252
251
|
k_bin_bcast<bin_op>(src0_dd, src1_dd, dst_dd, ne0, ne1,
|
|
253
252
|
ne2, ne3, ne10, ne11, ne12, ne13,
|
|
254
253
|
s1, s2, s3, s01, s02, s03, s11, s12, s13,
|
|
@@ -199,7 +199,7 @@ struct sycl_device_info {
|
|
|
199
199
|
// size_t smpb; // max. shared memory per block
|
|
200
200
|
bool vmm; // virtual memory support
|
|
201
201
|
size_t total_vram;
|
|
202
|
-
sycl_hw_info hw_info;
|
|
202
|
+
//sycl_hw_info hw_info; \\ device id and aarch, currently not used
|
|
203
203
|
optimize_feature opt_feature;
|
|
204
204
|
};
|
|
205
205
|
|
|
@@ -286,29 +286,6 @@ struct ggml_tensor_extra_gpu {
|
|
|
286
286
|
|
|
287
287
|
void release_extra_gpu(ggml_tensor_extra_gpu * extra, std::vector<queue_ptr> streams={});
|
|
288
288
|
|
|
289
|
-
inline optimize_feature check_gpu_optimize_feature(syclex::architecture &arch) {
|
|
290
|
-
optimize_feature opt;
|
|
291
|
-
|
|
292
|
-
opt.reorder =
|
|
293
|
-
(arch == syclex::architecture::intel_gpu_dg1 ||
|
|
294
|
-
arch == syclex::architecture::intel_gpu_acm_g10 ||
|
|
295
|
-
arch == syclex::architecture::intel_gpu_acm_g11 ||
|
|
296
|
-
arch == syclex::architecture::intel_gpu_acm_g12 ||
|
|
297
|
-
arch == syclex::architecture::intel_gpu_pvc ||
|
|
298
|
-
arch == syclex::architecture::intel_gpu_pvc_vg ||
|
|
299
|
-
arch == syclex::architecture::intel_gpu_mtl_u ||
|
|
300
|
-
arch == syclex::architecture::intel_gpu_mtl_s ||
|
|
301
|
-
arch == syclex::architecture::intel_gpu_mtl_h ||
|
|
302
|
-
arch == syclex::architecture::intel_gpu_arl_u ||
|
|
303
|
-
arch == syclex::architecture::intel_gpu_arl_s ||
|
|
304
|
-
arch == syclex::architecture::intel_gpu_arl_h ||
|
|
305
|
-
arch == syclex::architecture::intel_gpu_bmg_g21 ||
|
|
306
|
-
arch == syclex::architecture::intel_gpu_lnl_m
|
|
307
|
-
);
|
|
308
|
-
|
|
309
|
-
return opt;
|
|
310
|
-
}
|
|
311
|
-
|
|
312
289
|
namespace sycl_ex = sycl::ext::oneapi::experimental;
|
|
313
290
|
struct ggml_backend_sycl_context {
|
|
314
291
|
int device;
|
|
@@ -89,33 +89,24 @@ static void concat_f32_sycl(const float *x, const float *y, float *dst,
|
|
|
89
89
|
sycl::range<3> gridDim(ne2, ne1, num_blocks);
|
|
90
90
|
switch (dim) {
|
|
91
91
|
case 0:
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
concat_f32_dim0(x, y, dst, ne0, ne00, item_ct1);
|
|
98
|
-
});
|
|
99
|
-
break;
|
|
92
|
+
sycl_parallel_for(stream,
|
|
93
|
+
sycl::nd_range<3>(gridDim * sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE),
|
|
94
|
+
sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)),
|
|
95
|
+
[=](sycl::nd_item<3> item_ct1) { concat_f32_dim0(x, y, dst, ne0, ne00, item_ct1); });
|
|
96
|
+
break;
|
|
100
97
|
case 1:
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
concat_f32_dim1(x, y, dst, ne0, ne01, item_ct1);
|
|
107
|
-
});
|
|
108
|
-
break;
|
|
98
|
+
sycl_parallel_for(stream,
|
|
99
|
+
sycl::nd_range<3>(gridDim * sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE),
|
|
100
|
+
sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)),
|
|
101
|
+
[=](sycl::nd_item<3> item_ct1) { concat_f32_dim1(x, y, dst, ne0, ne01, item_ct1); });
|
|
102
|
+
break;
|
|
109
103
|
// dim >=2 will be dispatched to the default path
|
|
110
104
|
default:
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
concat_f32_dim2(x, y, dst, ne0, ne02, item_ct1);
|
|
117
|
-
});
|
|
118
|
-
break;
|
|
105
|
+
sycl_parallel_for(stream,
|
|
106
|
+
sycl::nd_range<3>(gridDim * sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE),
|
|
107
|
+
sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)),
|
|
108
|
+
[=](sycl::nd_item<3> item_ct1) { concat_f32_dim2(x, y, dst, ne0, ne02, item_ct1); });
|
|
109
|
+
break;
|
|
119
110
|
}
|
|
120
111
|
}
|
|
121
112
|
|
|
@@ -129,33 +120,29 @@ static void concat_f32_sycl_non_cont(
|
|
|
129
120
|
int64_t ne2, int64_t ne3, uint64_t nb0, uint64_t nb1, uint64_t nb2,
|
|
130
121
|
uint64_t nb3, int32_t dim) {
|
|
131
122
|
sycl::range<3> gridDim(ne3, ne2, ne1);
|
|
132
|
-
stream
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
int64_t i2 = item_ct1.get_group(1);
|
|
137
|
-
int64_t i1 = item_ct1.get_group(2);
|
|
123
|
+
sycl_parallel_for(stream, sycl::nd_range<3>(gridDim, sycl::range<3>(1, 1, 1)), [=](sycl::nd_item<3> item_ct1) {
|
|
124
|
+
int64_t i3 = item_ct1.get_group(0);
|
|
125
|
+
int64_t i2 = item_ct1.get_group(1);
|
|
126
|
+
int64_t i1 = item_ct1.get_group(2);
|
|
138
127
|
|
|
139
|
-
|
|
140
|
-
|
|
128
|
+
int64_t o[4] = { 0, 0, 0, 0 };
|
|
129
|
+
o[dim] = dim == 0 ? ne00 : (dim == 1 ? ne01 : (dim == 2 ? ne02 : ne03));
|
|
141
130
|
|
|
142
|
-
|
|
131
|
+
const float * x;
|
|
143
132
|
|
|
144
|
-
|
|
145
|
-
i0 += item_ct1.get_local_range(2)) {
|
|
133
|
+
for (int i0 = item_ct1.get_local_id(2); i0 < ne0; i0 += item_ct1.get_local_range(2)) {
|
|
146
134
|
if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) {
|
|
147
|
-
|
|
148
|
-
(i0)*nb00);
|
|
135
|
+
x = (const float *) (src0 + (i3) *nb03 + (i2) *nb02 + (i1) *nb01 + (i0) *nb00);
|
|
149
136
|
} else {
|
|
150
|
-
|
|
151
|
-
|
|
137
|
+
x = (const float *) (src1 + (i3 - o[3]) * nb13 + (i2 - o[2]) * nb12 + (i1 - o[1]) * nb11 +
|
|
138
|
+
(i0 - o[0]) * nb10);
|
|
152
139
|
}
|
|
153
140
|
|
|
154
141
|
float *y = (float *)(dst + i3 * nb3 + i2 * nb2 + i1 * nb1 + i0 * nb0);
|
|
155
142
|
|
|
156
143
|
*y = *x;
|
|
157
|
-
|
|
158
|
-
|
|
144
|
+
}
|
|
145
|
+
});
|
|
159
146
|
}
|
|
160
147
|
|
|
161
148
|
void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
|
@@ -59,16 +59,10 @@ static void conv_transpose_1d_f32_f32_sycl(
|
|
|
59
59
|
const int num_blocks = (output_size + SYCL_CONV_TRANPOSE_1D_BLOCK_SIZE - 1) / SYCL_CONV_TRANPOSE_1D_BLOCK_SIZE;
|
|
60
60
|
const sycl::range<3> block_dims(1, 1, SYCL_CONV_TRANPOSE_1D_BLOCK_SIZE);
|
|
61
61
|
const sycl::range<3> block_nums(1, 1, num_blocks);
|
|
62
|
-
stream
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
conv_transpose_1d_kernel(
|
|
67
|
-
s0, output_size,
|
|
68
|
-
src0_ne0, src0_ne1, src0_ne2,
|
|
69
|
-
src1_ne0, dst_ne0,
|
|
70
|
-
src0, src1, dst, item_ct1);
|
|
71
|
-
});
|
|
62
|
+
sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
|
63
|
+
conv_transpose_1d_kernel(s0, output_size, src0_ne0, src0_ne1, src0_ne2, src1_ne0, dst_ne0, src0, src1, dst,
|
|
64
|
+
item_ct1);
|
|
65
|
+
});
|
|
72
66
|
}
|
|
73
67
|
|
|
74
68
|
void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|