@novastera-oss/llamarn 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -14
- package/RNLlamaCpp.podspec +10 -3
- package/android/CMakeLists.txt +8 -0
- package/android/src/main/cpp/include/llama.h +62 -125
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/README.md +11 -3
- package/cpp/llama.cpp/build-xcframework.sh +1 -0
- package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/common/arg.cpp +153 -113
- package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
- package/cpp/llama.cpp/common/chat-parser.h +117 -0
- package/cpp/llama.cpp/common/chat.cpp +847 -699
- package/cpp/llama.cpp/common/chat.h +73 -6
- package/cpp/llama.cpp/common/common.cpp +50 -82
- package/cpp/llama.cpp/common/common.h +21 -17
- package/cpp/llama.cpp/common/json-partial.cpp +255 -0
- package/cpp/llama.cpp/common/json-partial.h +37 -0
- package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
- package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
- package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
- package/cpp/llama.cpp/common/regex-partial.h +56 -0
- package/cpp/llama.cpp/common/sampling.cpp +7 -8
- package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
- package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
- package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
- package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
- package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
- package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
- package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
- package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
- package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
- package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
- package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
- package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
- package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
- package/cpp/llama.cpp/include/llama.h +62 -125
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
- package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
- package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
- package/cpp/llama.cpp/models/templates/README.md +2 -0
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
- package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
- package/cpp/llama.cpp/src/llama-arch.h +2 -0
- package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
- package/cpp/llama.cpp/src/llama-context.cpp +340 -123
- package/cpp/llama.cpp/src/llama-context.h +30 -0
- package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
- package/cpp/llama.cpp/src/llama-cparams.h +2 -0
- package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
- package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
- package/cpp/llama.cpp/src/llama-graph.h +52 -7
- package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
- package/cpp/llama.cpp/src/llama-hparams.h +37 -5
- package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
- package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
- package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
- package/cpp/llama.cpp/src/llama-memory.h +4 -3
- package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
- package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
- package/cpp/llama.cpp/src/llama-model.cpp +529 -172
- package/cpp/llama.cpp/src/llama-model.h +6 -1
- package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
- package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
- package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
- package/cpp/llama.cpp/src/llama-vocab.h +6 -0
- package/cpp/llama.cpp/src/llama.cpp +14 -0
- package/cpp/rn-completion.cpp +4 -2
- package/ios/include/chat.h +73 -6
- package/ios/include/common/minja/chat-template.hpp +9 -5
- package/ios/include/common/minja/minja.hpp +69 -36
- package/ios/include/common.h +21 -17
- package/ios/include/llama.h +62 -125
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
- package/cpp/llama.cpp/common/stb_image.h +0 -7988
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
|
@@ -84,6 +84,15 @@ static void gelu_quick(const T *x, T *dst, int k,
|
|
|
84
84
|
dst[i] = x[i] * (static_cast<T>(1.0f) / (static_cast<T>(1.0f) + sycl::native::exp(GELU_QUICK_COEF * x[i])));
|
|
85
85
|
}
|
|
86
86
|
|
|
87
|
+
template<typename T>
|
|
88
|
+
static void gelu_erf(const T * x, T * dst, const int k, const sycl::nd_item<3> &item_ct1) {
|
|
89
|
+
const T SQRT_2_INV = static_cast<T>(0.70710678118654752440084436210484f);
|
|
90
|
+
for(auto i = item_ct1.get_global_id(2); i < (const size_t)k; i += item_ct1.get_global_range(2)) {
|
|
91
|
+
auto x_i = x[i];
|
|
92
|
+
dst[i] = static_cast<T>(0.5f) * x_i * (static_cast<T>(1.0f) + sycl::erf(x_i * SQRT_2_INV));
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
87
96
|
template<typename T>
|
|
88
97
|
static void tanh(const T *x, T *dst, int k,
|
|
89
98
|
const sycl::nd_item<3> &item_ct1) {
|
|
@@ -400,6 +409,20 @@ static void gelu_quick_sycl(const T *x, T *dst, const int k,
|
|
|
400
409
|
});
|
|
401
410
|
}
|
|
402
411
|
|
|
412
|
+
|
|
413
|
+
template<typename T>
|
|
414
|
+
static void gelu_erf_sycl(const T *x, T *dst, const int k,
|
|
415
|
+
queue_ptr stream) {
|
|
416
|
+
const int num_blocks = ceil_div(k, SYCL_GELU_BLOCK_SIZE);
|
|
417
|
+
stream->parallel_for(
|
|
418
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) *
|
|
419
|
+
sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE),
|
|
420
|
+
sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE)),
|
|
421
|
+
[=](sycl::nd_item<3> item_ct1) {
|
|
422
|
+
gelu_erf(x, dst, k, item_ct1);
|
|
423
|
+
});
|
|
424
|
+
}
|
|
425
|
+
|
|
403
426
|
template<typename T>
|
|
404
427
|
static void tanh_sycl(const T *x, T *dst, const int k,
|
|
405
428
|
queue_ptr stream) {
|
|
@@ -655,7 +678,6 @@ inline void ggml_sycl_op_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
|
|
|
655
678
|
}
|
|
656
679
|
default:
|
|
657
680
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
658
|
-
break;
|
|
659
681
|
}
|
|
660
682
|
}
|
|
661
683
|
|
|
@@ -688,7 +710,6 @@ inline void ggml_sycl_op_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
|
|
|
688
710
|
}
|
|
689
711
|
default:
|
|
690
712
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
691
|
-
break;
|
|
692
713
|
}
|
|
693
714
|
}
|
|
694
715
|
|
|
@@ -722,7 +743,6 @@ inline void ggml_sycl_op_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
|
|
|
722
743
|
}
|
|
723
744
|
default:
|
|
724
745
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
725
|
-
break;
|
|
726
746
|
}
|
|
727
747
|
}
|
|
728
748
|
|
|
@@ -754,7 +774,6 @@ inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst
|
|
|
754
774
|
}
|
|
755
775
|
default:
|
|
756
776
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
757
|
-
break;
|
|
758
777
|
}
|
|
759
778
|
}
|
|
760
779
|
|
|
@@ -786,7 +805,6 @@ inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst
|
|
|
786
805
|
}
|
|
787
806
|
default:
|
|
788
807
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
789
|
-
break;
|
|
790
808
|
}
|
|
791
809
|
}
|
|
792
810
|
|
|
@@ -818,10 +836,41 @@ inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor
|
|
|
818
836
|
}
|
|
819
837
|
default:
|
|
820
838
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
821
|
-
break;
|
|
822
839
|
}
|
|
823
840
|
}
|
|
824
841
|
|
|
842
|
+
inline void ggml_sycl_op_gelu_erf(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
|
843
|
+
#if defined (GGML_SYCL_F16)
|
|
844
|
+
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
|
|
845
|
+
GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
|
|
846
|
+
#else
|
|
847
|
+
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
|
848
|
+
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
|
849
|
+
#endif
|
|
850
|
+
GGML_ASSERT(dst->src[0]->type == dst->type);
|
|
851
|
+
dpct::queue_ptr main_stream = ctx.stream();
|
|
852
|
+
SYCL_CHECK(ggml_sycl_set_device(ctx.device));
|
|
853
|
+
switch (dst->type) {
|
|
854
|
+
#if defined (GGML_SYCL_F16)
|
|
855
|
+
case GGML_TYPE_F16:
|
|
856
|
+
{
|
|
857
|
+
auto data_pts = cast_data<sycl::half>(dst);
|
|
858
|
+
gelu_erf_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
|
|
859
|
+
break;
|
|
860
|
+
}
|
|
861
|
+
#endif
|
|
862
|
+
case GGML_TYPE_F32:
|
|
863
|
+
{
|
|
864
|
+
auto data_pts = cast_data<float>(dst);
|
|
865
|
+
gelu_erf_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
|
|
866
|
+
break;
|
|
867
|
+
}
|
|
868
|
+
default:
|
|
869
|
+
GGML_ABORT("GGML tensor type not supported!\n");
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
|
|
873
|
+
|
|
825
874
|
inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
826
875
|
#if defined (GGML_SYCL_F16)
|
|
827
876
|
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
|
|
@@ -850,7 +899,6 @@ inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst
|
|
|
850
899
|
}
|
|
851
900
|
default:
|
|
852
901
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
853
|
-
break;
|
|
854
902
|
}
|
|
855
903
|
}
|
|
856
904
|
|
|
@@ -883,7 +931,6 @@ inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst
|
|
|
883
931
|
}
|
|
884
932
|
default:
|
|
885
933
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
886
|
-
break;
|
|
887
934
|
}
|
|
888
935
|
}
|
|
889
936
|
|
|
@@ -917,7 +964,6 @@ inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tenso
|
|
|
917
964
|
}
|
|
918
965
|
default:
|
|
919
966
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
920
|
-
break;
|
|
921
967
|
}
|
|
922
968
|
}
|
|
923
969
|
|
|
@@ -949,7 +995,6 @@ inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor
|
|
|
949
995
|
}
|
|
950
996
|
default:
|
|
951
997
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
952
|
-
break;
|
|
953
998
|
}
|
|
954
999
|
}
|
|
955
1000
|
|
|
@@ -981,7 +1026,6 @@ inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
|
|
|
981
1026
|
}
|
|
982
1027
|
default:
|
|
983
1028
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
984
|
-
break;
|
|
985
1029
|
}
|
|
986
1030
|
}
|
|
987
1031
|
|
|
@@ -1013,7 +1057,6 @@ inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
|
|
|
1013
1057
|
}
|
|
1014
1058
|
default:
|
|
1015
1059
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
1016
|
-
break;
|
|
1017
1060
|
}
|
|
1018
1061
|
}
|
|
1019
1062
|
|
|
@@ -1045,7 +1088,6 @@ inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor *
|
|
|
1045
1088
|
}
|
|
1046
1089
|
default:
|
|
1047
1090
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
1048
|
-
break;
|
|
1049
1091
|
}
|
|
1050
1092
|
}
|
|
1051
1093
|
|
|
@@ -1078,7 +1120,6 @@ inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst
|
|
|
1078
1120
|
}
|
|
1079
1121
|
default:
|
|
1080
1122
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
1081
|
-
break;
|
|
1082
1123
|
}
|
|
1083
1124
|
}
|
|
1084
1125
|
|
|
@@ -1110,7 +1151,6 @@ inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
|
|
|
1110
1151
|
}
|
|
1111
1152
|
default:
|
|
1112
1153
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
1113
|
-
break;
|
|
1114
1154
|
}
|
|
1115
1155
|
}
|
|
1116
1156
|
|
|
@@ -1142,7 +1182,6 @@ inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
|
|
|
1142
1182
|
}
|
|
1143
1183
|
default:
|
|
1144
1184
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
1145
|
-
break;
|
|
1146
1185
|
}
|
|
1147
1186
|
}
|
|
1148
1187
|
|
|
@@ -1174,7 +1213,6 @@ inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst
|
|
|
1174
1213
|
}
|
|
1175
1214
|
default:
|
|
1176
1215
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
1177
|
-
break;
|
|
1178
1216
|
}
|
|
1179
1217
|
}
|
|
1180
1218
|
|
|
@@ -1206,7 +1244,6 @@ inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
|
|
|
1206
1244
|
}
|
|
1207
1245
|
default:
|
|
1208
1246
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
1209
|
-
break;
|
|
1210
1247
|
}
|
|
1211
1248
|
}
|
|
1212
1249
|
|
|
@@ -1241,7 +1278,6 @@ inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor
|
|
|
1241
1278
|
}
|
|
1242
1279
|
default:
|
|
1243
1280
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
1244
|
-
break;
|
|
1245
1281
|
}
|
|
1246
1282
|
}
|
|
1247
1283
|
|
|
@@ -1273,7 +1309,6 @@ inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
|
|
|
1273
1309
|
}
|
|
1274
1310
|
default:
|
|
1275
1311
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
1276
|
-
break;
|
|
1277
1312
|
}
|
|
1278
1313
|
}
|
|
1279
1314
|
|
|
@@ -1315,7 +1350,6 @@ inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, ggml_tensor *
|
|
|
1315
1350
|
}
|
|
1316
1351
|
default:
|
|
1317
1352
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
1318
|
-
break;
|
|
1319
1353
|
}
|
|
1320
1354
|
}
|
|
1321
1355
|
|
|
@@ -1350,7 +1384,6 @@ inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
|
|
|
1350
1384
|
}
|
|
1351
1385
|
default:
|
|
1352
1386
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
1353
|
-
break;
|
|
1354
1387
|
}
|
|
1355
1388
|
}
|
|
1356
1389
|
|
|
@@ -1388,7 +1421,6 @@ inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * ds
|
|
|
1388
1421
|
}
|
|
1389
1422
|
default:
|
|
1390
1423
|
GGML_ABORT("GGML tensor type not supported!\n");
|
|
1391
|
-
break;
|
|
1392
1424
|
}
|
|
1393
1425
|
}
|
|
1394
1426
|
|
|
@@ -1414,146 +1446,126 @@ inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, ggml_tensor *dst)
|
|
|
1414
1446
|
|
|
1415
1447
|
|
|
1416
1448
|
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1417
|
-
|
|
1449
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1418
1450
|
ggml_sycl_op_sqrt(ctx, dst);
|
|
1419
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1420
1451
|
}
|
|
1421
1452
|
|
|
1422
1453
|
void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1423
|
-
|
|
1454
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1424
1455
|
ggml_sycl_op_sin(ctx, dst);
|
|
1425
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1426
1456
|
}
|
|
1427
1457
|
|
|
1428
1458
|
void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1429
|
-
|
|
1459
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1430
1460
|
ggml_sycl_op_cos(ctx, dst);
|
|
1431
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1432
1461
|
}
|
|
1433
1462
|
|
|
1434
1463
|
void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1435
|
-
|
|
1464
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
|
|
1436
1465
|
ggml_sycl_op_acc(ctx, dst);
|
|
1437
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1438
1466
|
}
|
|
1439
1467
|
|
|
1440
1468
|
void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1441
|
-
|
|
1469
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1442
1470
|
ggml_sycl_op_gelu(ctx, dst);
|
|
1443
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1444
1471
|
}
|
|
1445
1472
|
|
|
1446
1473
|
void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1447
|
-
|
|
1474
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1448
1475
|
ggml_sycl_op_silu(ctx, dst);
|
|
1449
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1450
1476
|
}
|
|
1451
1477
|
|
|
1452
1478
|
void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1453
|
-
|
|
1479
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1454
1480
|
ggml_sycl_op_gelu_quick(ctx, dst);
|
|
1455
|
-
|
|
1481
|
+
}
|
|
1482
|
+
|
|
1483
|
+
void ggml_sycl_gelu_erf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1484
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1485
|
+
ggml_sycl_op_gelu_erf(ctx, dst);
|
|
1456
1486
|
}
|
|
1457
1487
|
|
|
1458
1488
|
void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1459
|
-
|
|
1489
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1460
1490
|
ggml_sycl_op_tanh(ctx, dst);
|
|
1461
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1462
1491
|
}
|
|
1463
1492
|
|
|
1464
1493
|
void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1465
|
-
|
|
1494
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1466
1495
|
ggml_sycl_op_relu(ctx, dst);
|
|
1467
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1468
1496
|
}
|
|
1469
1497
|
|
|
1470
1498
|
void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1471
|
-
|
|
1499
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1472
1500
|
ggml_sycl_op_sigmoid(ctx, dst);
|
|
1473
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1474
1501
|
}
|
|
1475
1502
|
|
|
1476
1503
|
void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1477
|
-
|
|
1504
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1478
1505
|
ggml_sycl_op_hardsigmoid(ctx, dst);
|
|
1479
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1480
1506
|
}
|
|
1481
1507
|
|
|
1482
1508
|
void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1483
|
-
|
|
1509
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1484
1510
|
ggml_sycl_op_hardswish(ctx, dst);
|
|
1485
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1486
1511
|
}
|
|
1487
1512
|
|
|
1488
|
-
|
|
1489
1513
|
void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1490
|
-
|
|
1514
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1491
1515
|
ggml_sycl_op_exp(ctx, dst);
|
|
1492
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1493
1516
|
}
|
|
1494
1517
|
|
|
1495
1518
|
void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1496
|
-
|
|
1519
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1497
1520
|
ggml_sycl_op_log(ctx, dst);
|
|
1498
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1499
1521
|
}
|
|
1500
1522
|
|
|
1501
1523
|
void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1502
|
-
|
|
1524
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1503
1525
|
ggml_sycl_op_neg(ctx, dst);
|
|
1504
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1505
1526
|
}
|
|
1506
1527
|
|
|
1507
1528
|
void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1508
|
-
|
|
1529
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1509
1530
|
ggml_sycl_op_step(ctx, dst);
|
|
1510
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1511
1531
|
}
|
|
1512
1532
|
|
|
1513
1533
|
void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1514
|
-
|
|
1534
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1515
1535
|
ggml_sycl_op_leaky_relu(ctx, dst);
|
|
1516
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1517
1536
|
}
|
|
1518
1537
|
|
|
1519
1538
|
void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1520
|
-
|
|
1539
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1521
1540
|
ggml_sycl_op_sqr(ctx, dst);
|
|
1522
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1523
1541
|
}
|
|
1524
1542
|
|
|
1525
1543
|
void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1526
|
-
|
|
1544
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1527
1545
|
ggml_sycl_op_upscale(ctx, dst);
|
|
1528
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1529
1546
|
}
|
|
1530
1547
|
|
|
1531
1548
|
void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1532
|
-
|
|
1549
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1533
1550
|
ggml_sycl_op_pad(ctx, dst);
|
|
1534
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1535
1551
|
}
|
|
1536
1552
|
|
|
1537
1553
|
void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1538
|
-
|
|
1554
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1539
1555
|
ggml_sycl_op_clamp(ctx, dst);
|
|
1540
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1541
1556
|
}
|
|
1542
1557
|
|
|
1543
1558
|
void ggml_sycl_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1544
|
-
|
|
1559
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1545
1560
|
ggml_sycl_op_sgn(ctx, dst);
|
|
1546
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1547
1561
|
}
|
|
1548
1562
|
|
|
1549
1563
|
void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1550
|
-
|
|
1564
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1551
1565
|
ggml_sycl_op_abs(ctx, dst);
|
|
1552
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1553
1566
|
}
|
|
1554
1567
|
|
|
1555
1568
|
void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1556
|
-
|
|
1569
|
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
1557
1570
|
ggml_sycl_op_elu(ctx, dst);
|
|
1558
|
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1559
1571
|
}
|
|
@@ -38,6 +38,8 @@ void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
|
38
38
|
|
|
39
39
|
void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
40
40
|
|
|
41
|
+
void ggml_sycl_gelu_erf(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
42
|
+
|
|
41
43
|
void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
42
44
|
|
|
43
45
|
void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
@@ -32,16 +32,36 @@ public:
|
|
|
32
32
|
else static_assert(0);
|
|
33
33
|
}
|
|
34
34
|
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
// matrix A has m rows, k columns
|
|
36
|
+
// matrix B has k rows, n columns
|
|
37
|
+
// nra - number of elements to skip when moving into next row in A
|
|
38
|
+
// nrb - number of elements to skip when moving into next row in B
|
|
39
|
+
// nca - number of elements to skip when moving into next column in A
|
|
40
|
+
// ncb - number of elements to skip when moving into next column in B
|
|
41
|
+
// stride_a - number of elements to skip when moving to next A matrix
|
|
42
|
+
// stride_b - number of elements to skip when moving to next B matrix
|
|
43
|
+
// batches_a - number of A matrices
|
|
44
|
+
// batches_b - number of B matrices
|
|
45
|
+
static void gemm(ggml_backend_sycl_context & ctx, int m, int n, int k,
|
|
46
|
+
const void * a, dt at, dnnl_dim_t nra, dnnl_dim_t nca, dnnl_dim_t stride_a,
|
|
47
|
+
const void * b, dt bt, dnnl_dim_t nrb, dnnl_dim_t ncb, dnnl_dim_t stride_b,
|
|
48
|
+
void * c, dt ct, const queue_ptr & q, dnnl_dim_t batches_a, dnnl_dim_t batches_b) {
|
|
49
|
+
|
|
37
50
|
auto stream = ctx.stream_dnnl(q);
|
|
38
51
|
auto eng = ctx.engine_dnnl(q);
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
dnnl::memory::dims
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
52
|
+
|
|
53
|
+
// { # strides, # rows, # columns }
|
|
54
|
+
dnnl::memory::dims a_dims = { batches_a, m, k };
|
|
55
|
+
dnnl::memory::dims b_dims = { batches_b, k, n };
|
|
56
|
+
dnnl::memory::dims c_dims = { std::max(batches_a, batches_b), m, n };
|
|
57
|
+
|
|
58
|
+
// { # elements to skip to next stride, # elements to skip to next row, # elements to skip to next column }
|
|
59
|
+
dnnl::memory::dims a_strides = { stride_a, nra, nca };
|
|
60
|
+
dnnl::memory::dims b_strides = { stride_b, nrb, ncb };
|
|
61
|
+
|
|
62
|
+
const auto a_in_md = dnnl::memory::desc(a_dims, at, a_strides);
|
|
63
|
+
const auto b_in_md = dnnl::memory::desc(b_dims, bt, b_strides);
|
|
64
|
+
const auto c_md = dnnl::memory::desc(c_dims, ct, tag::abc);
|
|
45
65
|
|
|
46
66
|
dnnl::primitive_attr primitive_attr;
|
|
47
67
|
primitive_attr.set_scratchpad_mode(dnnl::scratchpad_mode::user);
|
|
@@ -63,6 +83,15 @@ public:
|
|
|
63
83
|
|
|
64
84
|
matmul_prim.execute(stream, matmul_args);
|
|
65
85
|
}
|
|
86
|
+
|
|
87
|
+
// matrices A and B are column major, both having k rows
|
|
88
|
+
// matrix A has m column, matrix B has n columns
|
|
89
|
+
// output: column major matrix C = A transposed * B
|
|
90
|
+
static void row_gemm(ggml_backend_sycl_context & ctx, int m, int n, int k,
|
|
91
|
+
const void * a, dt at, const void * b, dt bt, void * c, dt ct, const queue_ptr & q) {
|
|
92
|
+
|
|
93
|
+
gemm(ctx, m, n, k, a, at, k, 1, k * m, b, bt, 1, k, n * k, c, ct, q, 1, 1);
|
|
94
|
+
}
|
|
66
95
|
};
|
|
67
96
|
|
|
68
97
|
#endif
|
|
@@ -257,8 +257,7 @@ static void get_rows_sycl_float(ggml_backend_sycl_context & ctx, const ggml_tens
|
|
|
257
257
|
GGML_UNUSED(ctx);
|
|
258
258
|
}
|
|
259
259
|
|
|
260
|
-
void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
|
261
|
-
|
|
260
|
+
void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
262
261
|
GGML_ASSERT(dst->src[1]->type == GGML_TYPE_I32);
|
|
263
262
|
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
|
264
263
|
|
|
@@ -308,4 +307,3 @@ void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
|
|
308
307
|
GGML_ABORT("fatal error");
|
|
309
308
|
}
|
|
310
309
|
}
|
|
311
|
-
|