@fugood/llama.node 0.3.16 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +6 -1
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +44 -2
- package/lib/index.js +132 -1
- package/lib/index.ts +203 -3
- package/package.json +2 -1
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +374 -19
- package/src/LlamaCompletionWorker.h +31 -10
- package/src/LlamaContext.cpp +216 -7
- package/src/LlamaContext.h +12 -0
- package/src/common.hpp +15 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
- package/src/llama.cpp/.github/workflows/build.yml +89 -767
- package/src/llama.cpp/.github/workflows/docker.yml +9 -6
- package/src/llama.cpp/.github/workflows/release.yml +716 -0
- package/src/llama.cpp/.github/workflows/server.yml +19 -23
- package/src/llama.cpp/CMakeLists.txt +11 -1
- package/src/llama.cpp/cmake/build-info.cmake +8 -2
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
- package/src/llama.cpp/common/CMakeLists.txt +35 -4
- package/src/llama.cpp/common/arg.cpp +844 -121
- package/src/llama.cpp/common/arg.h +9 -0
- package/src/llama.cpp/common/chat.cpp +129 -107
- package/src/llama.cpp/common/chat.h +2 -0
- package/src/llama.cpp/common/common.cpp +64 -518
- package/src/llama.cpp/common/common.h +35 -45
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
- package/src/llama.cpp/common/llguidance.cpp +31 -47
- package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
- package/src/llama.cpp/common/minja/minja.hpp +186 -127
- package/src/llama.cpp/common/regex-partial.cpp +204 -0
- package/src/llama.cpp/common/regex-partial.h +56 -0
- package/src/llama.cpp/common/sampling.cpp +60 -50
- package/src/llama.cpp/docs/build.md +122 -7
- package/src/llama.cpp/examples/CMakeLists.txt +2 -32
- package/src/llama.cpp/examples/batched/batched.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +2 -2
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
- package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/training/finetune.cpp +96 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
- package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
- package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
- package/src/llama.cpp/ggml/include/ggml.h +76 -106
- package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
- package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
- package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
- package/src/llama.cpp/ggml/src/ggml.c +170 -265
- package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
- package/src/llama.cpp/include/llama.h +82 -22
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +5 -3
- package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
- package/src/llama.cpp/scripts/xxd.cmake +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +4 -2
- package/src/llama.cpp/src/llama-adapter.cpp +43 -1
- package/src/llama.cpp/src/llama-arch.cpp +163 -17
- package/src/llama.cpp/src/llama-arch.h +16 -0
- package/src/llama.cpp/src/llama-batch.cpp +5 -1
- package/src/llama.cpp/src/llama-batch.h +2 -1
- package/src/llama.cpp/src/llama-chat.cpp +91 -16
- package/src/llama.cpp/src/llama-chat.h +7 -2
- package/src/llama.cpp/src/llama-context.cpp +479 -575
- package/src/llama.cpp/src/llama-context.h +44 -33
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +209 -157
- package/src/llama.cpp/src/llama-graph.h +38 -14
- package/src/llama.cpp/src/llama-hparams.h +13 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
- package/src/llama.cpp/src/llama-kv-cache.h +283 -171
- package/src/llama.cpp/src/llama-memory.h +12 -2
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
- package/src/llama.cpp/src/llama-model-loader.h +5 -3
- package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/src/llama.cpp/src/llama-model-saver.h +37 -0
- package/src/llama.cpp/src/llama-model.cpp +1803 -330
- package/src/llama.cpp/src/llama-model.h +21 -2
- package/src/llama.cpp/src/llama-quant.cpp +33 -10
- package/src/llama.cpp/src/llama-sampling.cpp +25 -7
- package/src/llama.cpp/src/llama-vocab.cpp +86 -10
- package/src/llama.cpp/src/llama-vocab.h +6 -0
- package/src/llama.cpp/src/llama.cpp +15 -1
- package/src/llama.cpp/tests/CMakeLists.txt +52 -31
- package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
- package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
- package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
- package/src/llama.cpp/tests/test-chat.cpp +15 -3
- package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
- package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
- package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
- package/src/llama.cpp/tests/test-opt.cpp +33 -21
- package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
- package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
- package/src/llama.cpp/tests/test-sampling.cpp +1 -1
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
- package/src/llama.cpp/tools/CMakeLists.txt +39 -0
- package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
- package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
- package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
- package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
- package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
- package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
- package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
- package/src/llama.cpp/tools/mtmd/clip.h +99 -0
- package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
- package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
- package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
- package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
- package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
- package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
- package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
- package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
- package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
- package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
- package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
- package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
- package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/infill.cpp +0 -590
- package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
- package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
- package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
- package/src/llama.cpp/examples/llava/clip.h +0 -118
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
- package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
- package/src/llama.cpp/examples/llava/llava.cpp +0 -574
- package/src/llama.cpp/examples/llava/llava.h +0 -49
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
- package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
|
@@ -367,7 +367,7 @@ static void l2_norm_f32_sycl(const float* x, float* dst, const int ncols,
|
|
|
367
367
|
sycl::nd_range<3>(sycl::range<3>(1, 1, nrows) * block_dims,
|
|
368
368
|
block_dims),
|
|
369
369
|
[=](sycl::nd_item<3> item_ct1)
|
|
370
|
-
[[
|
|
370
|
+
[[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
|
371
371
|
l2_norm_f32(x, dst, ncols, eps, item_ct1,
|
|
372
372
|
nullptr, WARP_SIZE);
|
|
373
373
|
});
|
|
@@ -389,7 +389,7 @@ static void l2_norm_f32_sycl(const float* x, float* dst, const int ncols,
|
|
|
389
389
|
sycl::nd_range<3>(sycl::range<3>(1, 1, nrows) * block_dims,
|
|
390
390
|
block_dims),
|
|
391
391
|
[=](sycl::nd_item<3> item_ct1)
|
|
392
|
-
[[
|
|
392
|
+
[[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
|
393
393
|
l2_norm_f32(x, dst, ncols, eps, item_ct1,
|
|
394
394
|
get_pointer(s_sum_acc_ct1), work_group_size);
|
|
395
395
|
});
|
|
@@ -397,90 +397,78 @@ static void l2_norm_f32_sycl(const float* x, float* dst, const int ncols,
|
|
|
397
397
|
}
|
|
398
398
|
}
|
|
399
399
|
|
|
400
|
-
void ggml_sycl_op_norm(ggml_backend_sycl_context& ctx,
|
|
401
|
-
ggml_tensor* dst, const float* src0_dd,
|
|
402
|
-
const float* src1_dd, float* dst_dd,
|
|
403
|
-
const queue_ptr& main_stream) {
|
|
400
|
+
void ggml_sycl_op_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
|
|
404
401
|
|
|
405
|
-
GGML_ASSERT(
|
|
402
|
+
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
|
406
403
|
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
|
407
404
|
|
|
408
|
-
const int64_t ne00 =
|
|
409
|
-
const int64_t nrows = ggml_nrows(
|
|
405
|
+
const int64_t ne00 = dst->src[0]->ne[0];
|
|
406
|
+
const int64_t nrows = ggml_nrows(dst->src[0]);
|
|
407
|
+
dpct::queue_ptr main_stream = ctx.stream();
|
|
408
|
+
SYCL_CHECK(ggml_sycl_set_device(ctx.device));
|
|
409
|
+
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
|
|
410
|
+
float * dst_dd = static_cast<float *>(dst->data);
|
|
410
411
|
|
|
411
412
|
float eps;
|
|
412
413
|
memcpy(&eps, dst->op_params, sizeof(float));
|
|
413
414
|
|
|
414
415
|
norm_f32_sycl(src0_dd, dst_dd, ne00, nrows, eps, main_stream, ctx.device);
|
|
415
|
-
|
|
416
|
-
(void)src1;
|
|
417
|
-
(void)dst;
|
|
418
|
-
(void)src1_dd;
|
|
419
416
|
}
|
|
420
417
|
|
|
421
|
-
void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx,
|
|
422
|
-
const ggml_tensor* src1, ggml_tensor* dst,
|
|
423
|
-
const float* src0_dd, const float* src1_dd,
|
|
424
|
-
float* dst_dd,
|
|
425
|
-
const queue_ptr& main_stream) {
|
|
418
|
+
void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
|
|
426
419
|
|
|
427
|
-
GGML_ASSERT(
|
|
420
|
+
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
|
428
421
|
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
|
429
422
|
|
|
430
423
|
int num_groups = dst->op_params[0];
|
|
424
|
+
dpct::queue_ptr main_stream = ctx.stream();
|
|
425
|
+
SYCL_CHECK(ggml_sycl_set_device(ctx.device));
|
|
426
|
+
|
|
427
|
+
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
|
|
428
|
+
float * dst_dd = static_cast<float *>(dst->data);
|
|
431
429
|
|
|
432
430
|
float eps;
|
|
433
431
|
memcpy(&eps, dst->op_params + 1, sizeof(float));
|
|
434
432
|
|
|
435
|
-
int group_size =
|
|
436
|
-
group_norm_f32_sycl(src0_dd, dst_dd, num_groups, eps, group_size,
|
|
437
|
-
|
|
438
|
-
(void)src1;
|
|
439
|
-
(void)dst;
|
|
440
|
-
(void)src1_dd;
|
|
441
|
-
GGML_UNUSED(ctx);
|
|
433
|
+
int group_size = dst->src[0]->ne[0] * dst->src[0]->ne[1] * ((dst->src[0]->ne[2] + num_groups - 1) / num_groups);
|
|
434
|
+
group_norm_f32_sycl(src0_dd, dst_dd, num_groups, eps, group_size, dst->src[0]->ne[0] * dst->src[0]->ne[1] * dst->src[0]->ne[2], main_stream, ctx.device);
|
|
442
435
|
}
|
|
443
436
|
|
|
444
|
-
void ggml_sycl_op_rms_norm(ggml_backend_sycl_context& ctx,
|
|
445
|
-
const ggml_tensor* src1, ggml_tensor* dst,
|
|
446
|
-
const float* src0_dd, const float* src1_dd,
|
|
447
|
-
float* dst_dd,
|
|
448
|
-
const queue_ptr& main_stream) {
|
|
437
|
+
void ggml_sycl_op_rms_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
449
438
|
|
|
450
|
-
GGML_ASSERT(
|
|
439
|
+
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
|
451
440
|
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
|
452
441
|
|
|
453
|
-
const int64_t ne00 =
|
|
454
|
-
const int64_t nrows = ggml_nrows(
|
|
442
|
+
const int64_t ne00 = dst->src[0]->ne[0];
|
|
443
|
+
const int64_t nrows = ggml_nrows(dst->src[0]);
|
|
444
|
+
dpct::queue_ptr main_stream = ctx.stream();
|
|
445
|
+
SYCL_CHECK(ggml_sycl_set_device(ctx.device));
|
|
446
|
+
|
|
447
|
+
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
|
|
448
|
+
float * dst_dd = static_cast<float *>(dst->data);
|
|
455
449
|
|
|
456
450
|
float eps;
|
|
457
451
|
memcpy(&eps, dst->op_params, sizeof(float));
|
|
458
452
|
|
|
459
453
|
rms_norm_f32_sycl(src0_dd, dst_dd, ne00, nrows, eps, main_stream, ctx.device);
|
|
460
|
-
|
|
461
|
-
(void)src1;
|
|
462
|
-
(void)dst;
|
|
463
|
-
(void)src1_dd;
|
|
464
454
|
}
|
|
465
455
|
|
|
466
|
-
void ggml_sycl_op_l2_norm(ggml_backend_sycl_context& ctx,
|
|
467
|
-
const ggml_tensor* src1, ggml_tensor* dst,
|
|
468
|
-
const float* src0_dd, const float* src1_dd,
|
|
469
|
-
float* dst_dd,
|
|
470
|
-
const queue_ptr& main_stream) {
|
|
456
|
+
void ggml_sycl_op_l2_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
|
|
471
457
|
|
|
472
|
-
GGML_ASSERT(
|
|
458
|
+
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
|
473
459
|
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
|
474
460
|
|
|
475
|
-
|
|
476
|
-
|
|
461
|
+
dpct::queue_ptr main_stream = ctx.stream();
|
|
462
|
+
SYCL_CHECK(ggml_sycl_set_device(ctx.device));
|
|
463
|
+
|
|
464
|
+
const int64_t ne00 = dst->src[0]->ne[0];
|
|
465
|
+
const int64_t nrows = ggml_nrows(dst->src[0]);
|
|
466
|
+
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
|
|
467
|
+
float * dst_dd = static_cast<float *>(dst->data);
|
|
477
468
|
|
|
478
469
|
float eps;
|
|
479
470
|
memcpy(&eps, dst->op_params, sizeof(float));
|
|
480
471
|
|
|
481
472
|
l2_norm_f32_sycl(src0_dd, dst_dd, ne00, nrows, eps, main_stream, ctx.device);
|
|
482
473
|
|
|
483
|
-
(void)src1;
|
|
484
|
-
(void)dst;
|
|
485
|
-
(void)src1_dd;
|
|
486
474
|
}
|
|
@@ -15,27 +15,12 @@
|
|
|
15
15
|
|
|
16
16
|
#include "common.hpp"
|
|
17
17
|
|
|
18
|
-
void ggml_sycl_op_norm(ggml_backend_sycl_context& ctx,
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
const float* src0_dd, const float* src1_dd,
|
|
26
|
-
float* dst_dd,
|
|
27
|
-
const queue_ptr& main_stream);
|
|
28
|
-
|
|
29
|
-
void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
|
|
30
|
-
const ggml_tensor* src1, ggml_tensor* dst,
|
|
31
|
-
const float* src0_dd, const float* src1_dd,
|
|
32
|
-
float* dst_dd,
|
|
33
|
-
const queue_ptr& main_stream);
|
|
34
|
-
|
|
35
|
-
void ggml_sycl_op_l2_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
|
|
36
|
-
const ggml_tensor* src1, ggml_tensor* dst,
|
|
37
|
-
const float* src0_dd, const float* src1_dd,
|
|
38
|
-
float* dst_dd,
|
|
39
|
-
const queue_ptr& main_stream);
|
|
18
|
+
void ggml_sycl_op_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst);
|
|
19
|
+
|
|
20
|
+
void ggml_sycl_op_rms_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst);
|
|
21
|
+
|
|
22
|
+
void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst);
|
|
23
|
+
|
|
24
|
+
void ggml_sycl_op_l2_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst);
|
|
40
25
|
|
|
41
26
|
#endif // GGML_SYCL_NORM_HPP
|
|
@@ -1,8 +1,5 @@
|
|
|
1
|
-
#include <sycl/sycl.hpp>
|
|
2
|
-
#include <oneapi/mkl.hpp>
|
|
3
1
|
#include "outprod.hpp"
|
|
4
2
|
|
|
5
|
-
|
|
6
3
|
void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
|
|
7
4
|
const ggml_tensor *src0 = dst->src[0];
|
|
8
5
|
const ggml_tensor *src1 = dst->src[1];
|
|
@@ -34,20 +31,13 @@ void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
|
|
|
34
31
|
|
|
35
32
|
// Handle transposition of src1
|
|
36
33
|
const bool src1_T = ggml_is_transposed(src1);
|
|
37
|
-
const oneapi::
|
|
38
|
-
src1_T ? oneapi::mkl::transpose::nontrans : oneapi::mkl::transpose::trans;
|
|
34
|
+
const oneapi::math::transpose src1_op = src1_T ? oneapi::math::transpose::nontrans : oneapi::math::transpose::trans;
|
|
39
35
|
const int64_t ldb = (src1_T ? nb10 : nb11) / sizeof(float);
|
|
40
36
|
|
|
41
37
|
try {
|
|
42
|
-
// Perform matrix multiplication using
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
oneapi::mkl::transpose::nontrans, src1_op, ne0, ne1, ne01, alpha, src0_d,
|
|
46
|
-
ne00, src1_d, ldb, beta, dst_d, ne0);
|
|
47
|
-
#else
|
|
48
|
-
oneapi::mkl::blas::column_major::gemm(*stream, oneapi::mkl::transpose::nontrans, src1_op, ne0, ne1, ne01, alpha,
|
|
49
|
-
src0_d, ne00, src1_d, ldb, beta, dst_d, ne0);
|
|
50
|
-
#endif
|
|
38
|
+
// Perform matrix multiplication using oneMath GEMM
|
|
39
|
+
oneapi::math::blas::column_major::gemm(get_onemath_backend(*stream), oneapi::math::transpose::nontrans, src1_op,
|
|
40
|
+
ne0, ne1, ne01, alpha, src0_d, ne00, src1_d, ldb, beta, dst_d, ne0);
|
|
51
41
|
}
|
|
52
42
|
catch (sycl::exception const& exc) {
|
|
53
43
|
std::cerr << exc.what() << std::endl;
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
//
|
|
2
|
+
// MIT license
|
|
3
|
+
// Copyright (C) 2025 Codeplay Software Ltd.
|
|
4
|
+
// Copyright (C) 2025 Intel Corporation
|
|
5
|
+
// SPDX-License-Identifier: MIT
|
|
6
|
+
//
|
|
7
|
+
|
|
8
|
+
//
|
|
9
|
+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
10
|
+
// See https://llvm.org/LICENSE.txt for license information.
|
|
11
|
+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
12
|
+
//
|
|
13
|
+
|
|
14
|
+
#ifndef GGML_SYCL_QUANTS_HPP
|
|
15
|
+
#define GGML_SYCL_QUANTS_HPP
|
|
16
|
+
|
|
17
|
+
#include "ggml-common.h"
|
|
18
|
+
#include "ggml.h"
|
|
19
|
+
|
|
20
|
+
namespace ggml_sycl_reordered {
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
// The reordered block moves quants (qs) and scales(d) to two
|
|
24
|
+
// uniform regions of memory that is contiguous in the same tensor.
|
|
25
|
+
// What this means is that instead of having:
|
|
26
|
+
// [d0, qs0] [d1, qs1] [d2, qs2] ... [dN, qsN]
|
|
27
|
+
// We have:
|
|
28
|
+
// [qs0, qs1, qs2, ..., qsN] [d0, d1, d2, ..., dN]
|
|
29
|
+
//
|
|
30
|
+
// Notes: out-of-bounds qs will run into d values
|
|
31
|
+
// Aligment relies on the allocated size of qs
|
|
32
|
+
|
|
33
|
+
template <ggml_type type> struct block_q_t;
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
// qk number of weights / quants in a block
|
|
37
|
+
// qr number of weights in a byte (described as 'before dequantization')
|
|
38
|
+
// for quantization types that has low and high bits split, qr is calculated with
|
|
39
|
+
// using the lower bits, e.g for Q6 quants QR6 is 2
|
|
40
|
+
// qi number of 32 bit integers needed to represent all the quants from a block (`qs` field)
|
|
41
|
+
// See ggml-common.h to see how these are calculated
|
|
42
|
+
template <> struct block_q_t<GGML_TYPE_Q4_0> {
|
|
43
|
+
struct traits {
|
|
44
|
+
static constexpr uint32_t qk = QK4_0;
|
|
45
|
+
static constexpr uint32_t qi = QI4_0;
|
|
46
|
+
static constexpr uint32_t qr = QR4_0;
|
|
47
|
+
static constexpr uint32_t vdr_mmvq = 2;
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
static constexpr int get_block_offset(const int block_index) { return block_index * (traits::qk / traits::qr); }
|
|
51
|
+
|
|
52
|
+
static constexpr int get_d_offset(int nrows, int ncols, const int block_index) {
|
|
53
|
+
return (ncols / traits::qr * nrows) + block_index * sizeof(ggml_half);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
static constexpr int block_to_q8_1_ratio() { return traits::qk / QK8_1; }
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
template <> struct block_q_t<GGML_TYPE_Q4_K> {
|
|
60
|
+
struct traits {
|
|
61
|
+
static constexpr uint32_t qk = QK_K;
|
|
62
|
+
static constexpr uint32_t qi = QI4_K;
|
|
63
|
+
static constexpr uint32_t qr = QR4_K;
|
|
64
|
+
static constexpr uint32_t vdr_mmvq = 2;
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
static constexpr int get_block_offset(const int block_index) { return block_index * (traits::qk / traits::qr); }
|
|
68
|
+
|
|
69
|
+
static constexpr int get_d_offset(int nrows, int ncols, const int block_index) {
|
|
70
|
+
auto nblocks = (nrows * (ncols / traits::qk));
|
|
71
|
+
return (nblocks * QK_K / 2) + (nblocks * K_SCALE_SIZE) + (block_index * sizeof(ggml_half2));
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
static constexpr int block_to_q8_1_ratio() { return traits::qk / QK8_1; }
|
|
75
|
+
|
|
76
|
+
constexpr size_t get_total_qs_bytes(int nblocks) { return nblocks * QK_K / 2; }
|
|
77
|
+
|
|
78
|
+
constexpr size_t get_dm_offset(int nblocks) { return get_total_qs_bytes(nblocks) + nblocks * K_SCALE_SIZE; }
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
} // namespace ggml_sycl_reordered
|
|
82
|
+
|
|
83
|
+
#endif // GGML_SYCL_QUANTS_HPP
|