@fugood/llama.node 0.6.3 → 1.0.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +40 -30
- package/README.md +4 -1
- package/lib/binding.js +41 -29
- package/lib/binding.ts +26 -25
- package/package.json +40 -7
- package/scripts/build.js +47 -0
- package/scripts/llama.cpp.patch +109 -0
- package/src/anyascii.c +22223 -0
- package/src/anyascii.h +42 -0
- package/src/tts_utils.cpp +20 -7
- package/src/tts_utils.h +2 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +0 -233
- package/src/llama.cpp/.github/workflows/build.yml +0 -1078
- package/src/llama.cpp/.github/workflows/close-issue.yml +0 -28
- package/src/llama.cpp/.github/workflows/docker.yml +0 -178
- package/src/llama.cpp/.github/workflows/editorconfig.yml +0 -29
- package/src/llama.cpp/.github/workflows/gguf-publish.yml +0 -44
- package/src/llama.cpp/.github/workflows/labeler.yml +0 -17
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +0 -33
- package/src/llama.cpp/.github/workflows/python-lint.yml +0 -30
- package/src/llama.cpp/.github/workflows/python-type-check.yml +0 -40
- package/src/llama.cpp/.github/workflows/release.yml +0 -739
- package/src/llama.cpp/.github/workflows/server.yml +0 -237
- package/src/llama.cpp/.github/workflows/winget.yml +0 -42
- package/src/llama.cpp/cmake/arm64-apple-clang.cmake +0 -16
- package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +0 -16
- package/src/llama.cpp/cmake/build-info.cmake +0 -64
- package/src/llama.cpp/cmake/common.cmake +0 -35
- package/src/llama.cpp/cmake/git-vars.cmake +0 -22
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -5
- package/src/llama.cpp/common/build-info.cpp.in +0 -4
- package/src/llama.cpp/docs/build.md +0 -561
- package/src/llama.cpp/examples/CMakeLists.txt +0 -43
- package/src/llama.cpp/examples/batched/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/batched/batched.cpp +0 -246
- package/src/llama.cpp/examples/chat-13B.bat +0 -57
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -941
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +0 -35
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/embedding/embedding.cpp +0 -323
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +0 -10
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +0 -194
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +0 -83
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/gguf/gguf.cpp +0 -265
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +0 -22
- package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +0 -46
- package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +0 -295
- package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +0 -52
- package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +0 -221
- package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +0 -24
- package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +0 -42
- package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +0 -7093
- package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +0 -694
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +0 -229
- package/src/llama.cpp/examples/jeopardy/questions.txt +0 -100
- package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -65
- package/src/llama.cpp/examples/llama.android/build.gradle.kts +0 -6
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +0 -71
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +0 -53
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +0 -452
- package/src/llama.cpp/examples/llama.android/settings.gradle.kts +0 -18
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -472
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +0 -23
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +0 -40
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +0 -47
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -157
- package/src/llama.cpp/examples/lookup/lookup.cpp +0 -242
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/parallel/parallel.cpp +0 -492
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/passkey/passkey.cpp +0 -277
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +0 -304
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -246
- package/src/llama.cpp/examples/simple/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/simple/simple.cpp +0 -206
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +0 -206
- package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +0 -11
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/speculative/speculative.cpp +0 -644
- package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +0 -261
- package/src/llama.cpp/examples/sycl/CMakeLists.txt +0 -9
- package/src/llama.cpp/examples/sycl/build.sh +0 -23
- package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +0 -13
- package/src/llama.cpp/examples/sycl/run-llama2.sh +0 -27
- package/src/llama.cpp/examples/sycl/run-llama3.sh +0 -28
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +0 -33
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +0 -9
- package/src/llama.cpp/examples/sycl/win-run-llama3.bat +0 -9
- package/src/llama.cpp/examples/training/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/training/finetune.cpp +0 -96
- package/src/llama.cpp/ggml/cmake/GitVars.cmake +0 -22
- package/src/llama.cpp/ggml/cmake/common.cmake +0 -26
- package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1042
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -255
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -586
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +0 -2008
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +0 -87
- package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +0 -517
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -74
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +0 -179
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +0 -258
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +0 -2863
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +0 -1110
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +0 -420
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -2570
- package/src/llama.cpp/ggml/src/ggml-common.h +0 -1857
- package/src/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +0 -100
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +0 -184
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +0 -15
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +0 -243
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +0 -140
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -131
- package/src/llama.cpp/ggml/src/ggml-impl.h +0 -601
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +0 -120
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +0 -622
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -113
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -96
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -5124
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +0 -1037
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -5232
- package/src/llama.cpp/ggml/src/ggml-quants.h +0 -100
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -1813
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +0 -189
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +0 -37
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +0 -239
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +0 -39
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -83
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +0 -493
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +0 -197
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +0 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +0 -100
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +0 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +0 -623
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +0 -34
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +0 -701
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +0 -11
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +0 -791
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +0 -1160
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +0 -27
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +0 -2957
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -1536
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +0 -75
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +0 -99
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +0 -311
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +0 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +0 -4443
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +0 -105
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +0 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +0 -136
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +0 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -3030
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +0 -33
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +0 -1108
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +0 -27
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +0 -474
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +0 -26
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +0 -46
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +0 -10
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +0 -74
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +0 -83
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +0 -362
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +0 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +0 -264
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +0 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +0 -13
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +0 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +0 -73
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +0 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +0 -1215
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +0 -305
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +0 -10
- package/src/llama.cpp/ggml/src/ggml-threading.cpp +0 -12
- package/src/llama.cpp/ggml/src/ggml-threading.h +0 -14
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +0 -196
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +0 -10699
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -39
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +0 -751
- package/src/llama.cpp/ggml/src/ggml.c +0 -6550
- package/src/llama.cpp/ggml/src/gguf.cpp +0 -1330
- package/src/llama.cpp/models/.editorconfig +0 -1
- package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
- package/src/llama.cpp/pocs/CMakeLists.txt +0 -14
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +0 -9
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +0 -173
- package/src/llama.cpp/pocs/vdot/vdot.cpp +0 -311
- package/src/llama.cpp/prompts/LLM-questions.txt +0 -49
- package/src/llama.cpp/prompts/alpaca.txt +0 -1
- package/src/llama.cpp/prompts/assistant.txt +0 -31
- package/src/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
- package/src/llama.cpp/prompts/chat-with-bob.txt +0 -7
- package/src/llama.cpp/prompts/chat-with-qwen.txt +0 -1
- package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
- package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
- package/src/llama.cpp/prompts/chat.txt +0 -28
- package/src/llama.cpp/prompts/dan-modified.txt +0 -1
- package/src/llama.cpp/prompts/dan.txt +0 -1
- package/src/llama.cpp/prompts/mnemonics.txt +0 -93
- package/src/llama.cpp/prompts/parallel-questions.txt +0 -43
- package/src/llama.cpp/prompts/reason-act.txt +0 -18
- package/src/llama.cpp/requirements/requirements-all.txt +0 -15
- package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +0 -2
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +0 -7
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +0 -7
- package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +0 -5
- package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +0 -1
- package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +0 -4
- package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +0 -3
- package/src/llama.cpp/requirements/requirements-pydantic.txt +0 -3
- package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +0 -1
- package/src/llama.cpp/requirements/requirements-tool_bench.txt +0 -12
- package/src/llama.cpp/requirements.txt +0 -13
- package/src/llama.cpp/scripts/build-info.sh +0 -30
- package/src/llama.cpp/scripts/install-oneapi.bat +0 -19
- package/src/llama.cpp/scripts/xxd.cmake +0 -16
- package/src/llama.cpp/tests/CMakeLists.txt +0 -177
- package/src/llama.cpp/tests/get-model.cpp +0 -21
- package/src/llama.cpp/tests/get-model.h +0 -2
- package/src/llama.cpp/tests/test-arg-parser.cpp +0 -178
- package/src/llama.cpp/tests/test-autorelease.cpp +0 -24
- package/src/llama.cpp/tests/test-backend-ops.cpp +0 -4793
- package/src/llama.cpp/tests/test-barrier.cpp +0 -94
- package/src/llama.cpp/tests/test-c.c +0 -7
- package/src/llama.cpp/tests/test-chat-template.cpp +0 -417
- package/src/llama.cpp/tests/test-chat.cpp +0 -985
- package/src/llama.cpp/tests/test-double-float.cpp +0 -57
- package/src/llama.cpp/tests/test-gbnf-validator.cpp +0 -109
- package/src/llama.cpp/tests/test-gguf.cpp +0 -1338
- package/src/llama.cpp/tests/test-grammar-integration.cpp +0 -1308
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +0 -1201
- package/src/llama.cpp/tests/test-grammar-parser.cpp +0 -519
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +0 -1304
- package/src/llama.cpp/tests/test-llama-grammar.cpp +0 -408
- package/src/llama.cpp/tests/test-log.cpp +0 -39
- package/src/llama.cpp/tests/test-model-load-cancel.cpp +0 -27
- package/src/llama.cpp/tests/test-mtmd-c-api.c +0 -63
- package/src/llama.cpp/tests/test-opt.cpp +0 -904
- package/src/llama.cpp/tests/test-quantize-fns.cpp +0 -186
- package/src/llama.cpp/tests/test-quantize-perf.cpp +0 -365
- package/src/llama.cpp/tests/test-quantize-stats.cpp +0 -424
- package/src/llama.cpp/tests/test-regex-partial.cpp +0 -288
- package/src/llama.cpp/tests/test-rope.cpp +0 -262
- package/src/llama.cpp/tests/test-sampling.cpp +0 -399
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +0 -312
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -155
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +0 -125
- package/src/llama.cpp/tools/CMakeLists.txt +0 -39
- package/src/llama.cpp/tools/batched-bench/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/batched-bench/batched-bench.cpp +0 -204
- package/src/llama.cpp/tools/cvector-generator/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/cvector-generator/completions.txt +0 -582
- package/src/llama.cpp/tools/cvector-generator/cvector-generator.cpp +0 -508
- package/src/llama.cpp/tools/cvector-generator/mean.hpp +0 -48
- package/src/llama.cpp/tools/cvector-generator/negative.txt +0 -4
- package/src/llama.cpp/tools/cvector-generator/pca.hpp +0 -315
- package/src/llama.cpp/tools/cvector-generator/positive.txt +0 -4
- package/src/llama.cpp/tools/export-lora/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/export-lora/export-lora.cpp +0 -434
- package/src/llama.cpp/tools/gguf-split/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/gguf-split/gguf-split.cpp +0 -583
- package/src/llama.cpp/tools/imatrix/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/imatrix/imatrix.cpp +0 -667
- package/src/llama.cpp/tools/llama-bench/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +0 -2024
- package/src/llama.cpp/tools/main/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/main/main.cpp +0 -977
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +0 -58
- package/src/llama.cpp/tools/mtmd/clip-impl.h +0 -462
- package/src/llama.cpp/tools/mtmd/clip.cpp +0 -4024
- package/src/llama.cpp/tools/mtmd/clip.h +0 -101
- package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +0 -22
- package/src/llama.cpp/tools/mtmd/miniaudio.h +0 -93468
- package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +0 -855
- package/src/llama.cpp/tools/mtmd/mtmd-audio.h +0 -62
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +0 -377
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +0 -297
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +0 -942
- package/src/llama.cpp/tools/mtmd/mtmd.h +0 -362
- package/src/llama.cpp/tools/mtmd/requirements.txt +0 -5
- package/src/llama.cpp/tools/perplexity/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/perplexity/perplexity.cpp +0 -2063
- package/src/llama.cpp/tools/quantize/CMakeLists.txt +0 -6
- package/src/llama.cpp/tools/quantize/quantize.cpp +0 -519
- package/src/llama.cpp/tools/rpc/CMakeLists.txt +0 -4
- package/src/llama.cpp/tools/rpc/rpc-server.cpp +0 -322
- package/src/llama.cpp/tools/run/CMakeLists.txt +0 -16
- package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.cpp +0 -1995
- package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.h +0 -137
- package/src/llama.cpp/tools/run/run.cpp +0 -1261
- package/src/llama.cpp/tools/server/CMakeLists.txt +0 -51
- package/src/llama.cpp/tools/server/bench/requirements.txt +0 -2
- package/src/llama.cpp/tools/server/httplib.h +0 -10506
- package/src/llama.cpp/tools/server/server.cpp +0 -4966
- package/src/llama.cpp/tools/server/tests/requirements.txt +0 -8
- package/src/llama.cpp/tools/server/utils.hpp +0 -1337
- package/src/llama.cpp/tools/tokenize/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/tokenize/tokenize.cpp +0 -416
- package/src/llama.cpp/tools/tts/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/tts/tts.cpp +0 -1092
|
@@ -1,791 +0,0 @@
|
|
|
1
|
-
//
|
|
2
|
-
// MIT license
|
|
3
|
-
// Copyright (C) 2024 Intel Corporation
|
|
4
|
-
// SPDX-License-Identifier: MIT
|
|
5
|
-
//
|
|
6
|
-
|
|
7
|
-
//
|
|
8
|
-
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
9
|
-
// See https://llvm.org/LICENSE.txt for license information.
|
|
10
|
-
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
11
|
-
//
|
|
12
|
-
|
|
13
|
-
#ifndef GGML_SYCL_DEQUANTIZE_HPP
|
|
14
|
-
#define GGML_SYCL_DEQUANTIZE_HPP
|
|
15
|
-
|
|
16
|
-
#include "common.hpp"
|
|
17
|
-
|
|
18
|
-
typedef void (*dequantize_kernel_t)(const void * vx, const int64_t ib, const int iqs, dfloat2 & v);
|
|
19
|
-
typedef void (*dequantize_kernel_t_reorder)(const void *d, const int64_t ib, const void *qs,
|
|
20
|
-
const int iqs, dfloat2 &v);
|
|
21
|
-
|
|
22
|
-
static __dpct_inline__ void dequantize_q4_0(const void *vx, const int64_t ib,
|
|
23
|
-
const int iqs, dfloat2 &v) {
|
|
24
|
-
const block_q4_0 * x = (const block_q4_0 *) vx;
|
|
25
|
-
|
|
26
|
-
const dfloat d = x[ib].d;
|
|
27
|
-
|
|
28
|
-
const int vui = x[ib].qs[iqs];
|
|
29
|
-
|
|
30
|
-
v.x() = vui & 0xF;
|
|
31
|
-
v.y() = vui >> 4;
|
|
32
|
-
|
|
33
|
-
#ifdef GGML_SYCL_F16
|
|
34
|
-
// v = v - {8.0f, 8.0f};
|
|
35
|
-
// v = v * {d, d};
|
|
36
|
-
v.s0() = (v.s0() - 8.0f) * d;
|
|
37
|
-
v.s1() = (v.s1() - 8.0f) * d;
|
|
38
|
-
|
|
39
|
-
#else
|
|
40
|
-
v.x() = (v.x() - 8.0f) * d;
|
|
41
|
-
v.y() = (v.y() - 8.0f) * d;
|
|
42
|
-
#endif // GGML_SYCL_F16
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
static __dpct_inline__ void dequantize_q4_0_reorder(const void *d_ptr, const int64_t ib, const void *qs,
|
|
46
|
-
const int iqs, dfloat2 &v) {
|
|
47
|
-
// const block_q4_0 * x = (const block_q4_0 *) vx;
|
|
48
|
-
|
|
49
|
-
const dfloat d = (const dfloat)*((const sycl::half*)d_ptr+ib);
|
|
50
|
-
|
|
51
|
-
const int vui = *((const uint8_t *)qs+iqs);
|
|
52
|
-
|
|
53
|
-
v.x() = vui & 0xF;
|
|
54
|
-
v.y() = vui >> 4;
|
|
55
|
-
|
|
56
|
-
#ifdef GGML_SYCL_F16
|
|
57
|
-
// v = v - {8.0f, 8.0f};
|
|
58
|
-
// v = v * {d, d};
|
|
59
|
-
v.s0() = (v.s0() - 8.0f) * d;
|
|
60
|
-
v.s1() = (v.s1() - 8.0f) * d;
|
|
61
|
-
|
|
62
|
-
#else
|
|
63
|
-
v.x() = (v.x() - 8.0f) * d;
|
|
64
|
-
v.y() = (v.y() - 8.0f) * d;
|
|
65
|
-
#endif // GGML_SYCL_F16
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
static __dpct_inline__ void dequantize_q4_1(const void *vx, const int64_t ib,
|
|
69
|
-
const int iqs, dfloat2 &v) {
|
|
70
|
-
const block_q4_1 * x = (const block_q4_1 *) vx;
|
|
71
|
-
|
|
72
|
-
const dfloat d = x[ib].dm[0];
|
|
73
|
-
const dfloat m = x[ib].dm[1];
|
|
74
|
-
|
|
75
|
-
const int vui = x[ib].qs[iqs];
|
|
76
|
-
|
|
77
|
-
v.x() = vui & 0xF;
|
|
78
|
-
v.y() = vui >> 4;
|
|
79
|
-
|
|
80
|
-
#ifdef GGML_SYCL_F16
|
|
81
|
-
// v = v * {d, d};
|
|
82
|
-
// v = v + {m, m};
|
|
83
|
-
v.s0() = sycl::fma(v.s0(), d, m);
|
|
84
|
-
v.s1() = sycl::fma(v.s1(), d, m);
|
|
85
|
-
|
|
86
|
-
#else
|
|
87
|
-
v.x() = sycl::fma(v.x(), d, m);
|
|
88
|
-
v.y() = sycl::fma(v.y(), d, m);
|
|
89
|
-
#endif // GGML_SYCL_F16
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
static __dpct_inline__ void dequantize_q5_0(const void *vx, const int64_t ib,
|
|
93
|
-
const int iqs, dfloat2 &v) {
|
|
94
|
-
const block_q5_0 * x = (const block_q5_0 *) vx;
|
|
95
|
-
|
|
96
|
-
const dfloat d = x[ib].d;
|
|
97
|
-
|
|
98
|
-
uint32_t qh;
|
|
99
|
-
memcpy(&qh, x[ib].qh, sizeof(qh));
|
|
100
|
-
|
|
101
|
-
const int xh_0 = ((qh >> (iqs + 0)) << 4) & 0x10;
|
|
102
|
-
const int xh_1 = ((qh >> (iqs + 12)) ) & 0x10;
|
|
103
|
-
|
|
104
|
-
v.x() = ((x[ib].qs[iqs] & 0xf) | xh_0);
|
|
105
|
-
v.y() = ((x[ib].qs[iqs] >> 4) | xh_1);
|
|
106
|
-
|
|
107
|
-
#ifdef GGML_SYCL_F16
|
|
108
|
-
// v = v - {16.0f, 16.0f};
|
|
109
|
-
// v = v * {d, d};
|
|
110
|
-
v.s0() = (v.s0() - 16.0f) * d;
|
|
111
|
-
v.s1() = (v.s1() - 16.0f) * d;
|
|
112
|
-
|
|
113
|
-
#else
|
|
114
|
-
v.x() = (v.x() - 16.0f) * d;
|
|
115
|
-
v.y() = (v.y() - 16.0f) * d;
|
|
116
|
-
#endif // GGML_SYCL_F16
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
static __dpct_inline__ void dequantize_q5_1(const void *vx, const int64_t ib,
|
|
120
|
-
const int iqs, dfloat2 &v) {
|
|
121
|
-
const block_q5_1 * x = (const block_q5_1 *) vx;
|
|
122
|
-
|
|
123
|
-
const dfloat d = x[ib].dm[0];
|
|
124
|
-
const dfloat m = x[ib].dm[1];
|
|
125
|
-
|
|
126
|
-
uint32_t qh;
|
|
127
|
-
memcpy(&qh, x[ib].qh, sizeof(qh));
|
|
128
|
-
|
|
129
|
-
const int xh_0 = ((qh >> (iqs + 0)) << 4) & 0x10;
|
|
130
|
-
const int xh_1 = ((qh >> (iqs + 12)) ) & 0x10;
|
|
131
|
-
|
|
132
|
-
v.x() = ((x[ib].qs[iqs] & 0xf) | xh_0);
|
|
133
|
-
v.y() = ((x[ib].qs[iqs] >> 4) | xh_1);
|
|
134
|
-
|
|
135
|
-
#ifdef GGML_SYCL_F16
|
|
136
|
-
// v = v * {d, d};
|
|
137
|
-
// v = v + {m, m};
|
|
138
|
-
v.s0() = sycl::fma(v.s0(), d, m);
|
|
139
|
-
v.s1() = sycl::fma(v.s1(), d, m);
|
|
140
|
-
#else
|
|
141
|
-
v.x() = sycl::fma(v.x(), d, m);
|
|
142
|
-
v.y() = sycl::fma(v.y(), d, m);
|
|
143
|
-
#endif // GGML_SYCL_F16
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
static __dpct_inline__ void dequantize_q8_0(const void *vx, const int64_t ib,
|
|
147
|
-
const int iqs, dfloat2 &v) {
|
|
148
|
-
const block_q8_0 * x = (const block_q8_0 *) vx;
|
|
149
|
-
|
|
150
|
-
const dfloat d = x[ib].d;
|
|
151
|
-
|
|
152
|
-
v.x() = x[ib].qs[iqs + 0];
|
|
153
|
-
v.y() = x[ib].qs[iqs + 1];
|
|
154
|
-
|
|
155
|
-
#ifdef GGML_SYCL_F16
|
|
156
|
-
// v = v * {d, d};
|
|
157
|
-
v.s0() *= d;
|
|
158
|
-
v.s1() *= d;
|
|
159
|
-
#else
|
|
160
|
-
v.x() *= d;
|
|
161
|
-
v.y() *= d;
|
|
162
|
-
#endif // GGML_SYCL_F16
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
template<typename dst_t>
|
|
166
|
-
static void dequantize_block_q4_0(const void * __restrict__ vx, dst_t * __restrict__ yy, int64_t nb32,
|
|
167
|
-
const sycl::nd_item<3> &item_ct1) {
|
|
168
|
-
|
|
169
|
-
const int64_t i = item_ct1.get_group(2);
|
|
170
|
-
|
|
171
|
-
// assume 32 threads
|
|
172
|
-
const int64_t tid = item_ct1.get_local_id(2);
|
|
173
|
-
const int64_t il = tid/8;
|
|
174
|
-
const int64_t ir = tid%8;
|
|
175
|
-
const int64_t ib = 8*i + ir;
|
|
176
|
-
if (ib >= nb32) {
|
|
177
|
-
return;
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
dst_t * y = yy + 256*i + 32*ir + 4*il;
|
|
181
|
-
|
|
182
|
-
const block_q4_0 * x = (const block_q4_0 *)vx + ib;
|
|
183
|
-
const float d = sycl::vec<sycl::half, 1>(x->d)
|
|
184
|
-
.convert<float, sycl::rounding_mode::automatic>()[0];
|
|
185
|
-
const float dm = -8*d;
|
|
186
|
-
|
|
187
|
-
const uint8_t * q = x->qs + 4*il;
|
|
188
|
-
|
|
189
|
-
for (int l = 0; l < 4; ++l) {
|
|
190
|
-
y[l+ 0] = d * (q[l] & 0xF) + dm;
|
|
191
|
-
y[l+16] = d * (q[l] >> 4) + dm;
|
|
192
|
-
}
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
template<typename dst_t>
|
|
196
|
-
static void dequantize_block_q4_0_reorder(const void * __restrict__ vx, dst_t * __restrict__ yy, int64_t nb32,
|
|
197
|
-
const sycl::nd_item<3> &item_ct1) {
|
|
198
|
-
|
|
199
|
-
const int64_t i = item_ct1.get_group(2);
|
|
200
|
-
auto k=nb32;
|
|
201
|
-
// assume 32 threads
|
|
202
|
-
const int64_t tid = item_ct1.get_local_id(2);
|
|
203
|
-
const int lane_ib = i * WARP_SIZE + tid;
|
|
204
|
-
|
|
205
|
-
if (lane_ib >= k / QK4_0) {
|
|
206
|
-
return;
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
dst_t * y_ptr = yy + lane_ib * QK4_0;
|
|
210
|
-
|
|
211
|
-
auto qs = (const uint8_t*)vx + lane_ib * QK4_0 / 2;
|
|
212
|
-
auto s_ptr = (const sycl::half*)((const uint8_t*)vx + k / 2) + lane_ib;
|
|
213
|
-
|
|
214
|
-
const float d = float(*s_ptr);
|
|
215
|
-
|
|
216
|
-
#pragma unroll
|
|
217
|
-
for (int l = 0; l < QK4_0 / 2; ++l) {
|
|
218
|
-
int vq = qs[l];
|
|
219
|
-
y_ptr[l + 0] = d * ((vq & 0xF) - 8);
|
|
220
|
-
y_ptr[l + 16] = d * ((vq >> 4) - 8);
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
template<typename dst_t>
|
|
226
|
-
static void dequantize_block_q4_1(const void * __restrict__ vx, dst_t * __restrict__ yy, int64_t nb32,
|
|
227
|
-
const sycl::nd_item<3> &item_ct1) {
|
|
228
|
-
|
|
229
|
-
const int64_t i = item_ct1.get_group(2);
|
|
230
|
-
|
|
231
|
-
// assume 32 threads
|
|
232
|
-
const int64_t tid = item_ct1.get_local_id(2);
|
|
233
|
-
const int64_t il = tid/8;
|
|
234
|
-
const int64_t ir = tid%8;
|
|
235
|
-
const int64_t ib = 8*i + ir;
|
|
236
|
-
if (ib >= nb32) {
|
|
237
|
-
return;
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
dst_t * y = yy + 256*i + 32*ir + 4*il;
|
|
241
|
-
|
|
242
|
-
const block_q4_1 * x = (const block_q4_1 *)vx + ib;
|
|
243
|
-
const sycl::float2 d =
|
|
244
|
-
x->dm.convert<float, sycl::rounding_mode::automatic>();
|
|
245
|
-
|
|
246
|
-
const uint8_t * q = x->qs + 4*il;
|
|
247
|
-
|
|
248
|
-
for (int l = 0; l < 4; ++l) {
|
|
249
|
-
y[l + 0] = d.x() * (q[l] & 0xF) + d.y();
|
|
250
|
-
y[l + 16] = d.x() * (q[l] >> 4) + d.y();
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
//================================== k-quants
|
|
256
|
-
|
|
257
|
-
template<typename dst_t>
|
|
258
|
-
static void dequantize_block_q2_K(const void * __restrict__ vx, dst_t * __restrict__ yy,
|
|
259
|
-
const sycl::nd_item<3> &item_ct1) {
|
|
260
|
-
|
|
261
|
-
const int64_t i = item_ct1.get_group(2);
|
|
262
|
-
const block_q2_K * x = (const block_q2_K *) vx;
|
|
263
|
-
|
|
264
|
-
const int64_t tid = item_ct1.get_local_id(2);
|
|
265
|
-
#if QK_K == 256
|
|
266
|
-
const int64_t n = tid/32;
|
|
267
|
-
const int64_t l = tid - 32*n;
|
|
268
|
-
const int64_t is = 8*n + l/16;
|
|
269
|
-
|
|
270
|
-
const uint8_t q = x[i].qs[32*n + l];
|
|
271
|
-
dst_t * y = yy + i*QK_K + 128*n;
|
|
272
|
-
|
|
273
|
-
float dall = x[i].dm[0];
|
|
274
|
-
float dmin = x[i].dm[1];
|
|
275
|
-
y[l+ 0] = dall * (x[i].scales[is+0] & 0xF) * ((q >> 0) & 3) - dmin * (x[i].scales[is+0] >> 4);
|
|
276
|
-
y[l+32] = dall * (x[i].scales[is+2] & 0xF) * ((q >> 2) & 3) - dmin * (x[i].scales[is+2] >> 4);
|
|
277
|
-
y[l+64] = dall * (x[i].scales[is+4] & 0xF) * ((q >> 4) & 3) - dmin * (x[i].scales[is+4] >> 4);
|
|
278
|
-
y[l+96] = dall * (x[i].scales[is+6] & 0xF) * ((q >> 6) & 3) - dmin * (x[i].scales[is+6] >> 4);
|
|
279
|
-
#else
|
|
280
|
-
const int64_t is = tid/16; // 0 or 1
|
|
281
|
-
const int64_t il = tid%16; // 0...15
|
|
282
|
-
const uint8_t q = x[i].qs[il] >> (2*is);
|
|
283
|
-
dst_t * y = yy + i*QK_K + 16*is + il;
|
|
284
|
-
|
|
285
|
-
float dall = x[i].dm[0];
|
|
286
|
-
float dmin = x[i].dm[1];
|
|
287
|
-
y[ 0] = dall * (x[i].scales[is+0] & 0xF) * ((q >> 0) & 3) - dmin * (x[i].scales[is+0] >> 4);
|
|
288
|
-
y[32] = dall * (x[i].scales[is+2] & 0xF) * ((q >> 4) & 3) - dmin * (x[i].scales[is+2] >> 4);
|
|
289
|
-
#endif
|
|
290
|
-
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
template<typename dst_t>
|
|
294
|
-
static void dequantize_block_q3_K(const void * __restrict__ vx, dst_t * __restrict__ yy,
|
|
295
|
-
const sycl::nd_item<3> &item_ct1) {
|
|
296
|
-
|
|
297
|
-
const int64_t i = item_ct1.get_group(2);
|
|
298
|
-
const block_q3_K * x = (const block_q3_K *) vx;
|
|
299
|
-
|
|
300
|
-
#if QK_K == 256
|
|
301
|
-
const int64_t r = item_ct1.get_local_id(2) / 4;
|
|
302
|
-
const int64_t tid = r/2;
|
|
303
|
-
const int64_t is0 = r%2;
|
|
304
|
-
const int64_t l0 = 16 * is0 + 4 * (item_ct1.get_local_id(2) % 4);
|
|
305
|
-
const int64_t n = tid / 4;
|
|
306
|
-
const int64_t j = tid - 4*n;
|
|
307
|
-
|
|
308
|
-
uint8_t m = 1 << (4*n + j);
|
|
309
|
-
int64_t is = 8*n + 2*j + is0;
|
|
310
|
-
int shift = 2*j;
|
|
311
|
-
|
|
312
|
-
int8_t us = is < 4 ? (x[i].scales[is-0] & 0xF) | (((x[i].scales[is+8] >> 0) & 3) << 4) :
|
|
313
|
-
is < 8 ? (x[i].scales[is-0] & 0xF) | (((x[i].scales[is+4] >> 2) & 3) << 4) :
|
|
314
|
-
is < 12 ? (x[i].scales[is-8] >> 4) | (((x[i].scales[is+0] >> 4) & 3) << 4) :
|
|
315
|
-
(x[i].scales[is-8] >> 4) | (((x[i].scales[is-4] >> 6) & 3) << 4);
|
|
316
|
-
float d_all = x[i].d;
|
|
317
|
-
float dl = d_all * (us - 32);
|
|
318
|
-
|
|
319
|
-
dst_t * y = yy + i*QK_K + 128*n + 32*j;
|
|
320
|
-
const uint8_t * q = x[i].qs + 32*n;
|
|
321
|
-
const uint8_t * hm = x[i].hmask;
|
|
322
|
-
|
|
323
|
-
for (int l = l0; l < l0+4; ++l) y[l] = dl * ((int8_t)((q[l] >> shift) & 3) - ((hm[l] & m) ? 0 : 4));
|
|
324
|
-
#else
|
|
325
|
-
const int64_t tid = item_ct1.get_local_id(2);
|
|
326
|
-
const int64_t is = tid/16; // 0 or 1
|
|
327
|
-
const int64_t il = tid%16; // 0...15
|
|
328
|
-
const int64_t im = il/8; // 0...1
|
|
329
|
-
const int64_t in = il%8; // 0...7
|
|
330
|
-
|
|
331
|
-
dst_t * y = yy + i*QK_K + 16*is + il;
|
|
332
|
-
|
|
333
|
-
const uint8_t q = x[i].qs[il] >> (2*is);
|
|
334
|
-
const uint8_t h = x[i].hmask[in] >> (2*is + im);
|
|
335
|
-
const float d = (float)x[i].d;
|
|
336
|
-
|
|
337
|
-
if (is == 0) {
|
|
338
|
-
y[ 0] = d * ((x[i].scales[0] & 0xF) - 8) * ((int8_t)((q >> 0) & 3) - ((h >> 0) & 1 ? 0 : 4));
|
|
339
|
-
y[32] = d * ((x[i].scales[1] & 0xF) - 8) * ((int8_t)((q >> 4) & 3) - ((h >> 4) & 1 ? 0 : 4));
|
|
340
|
-
} else {
|
|
341
|
-
y[ 0] = d * ((x[i].scales[0] >> 4) - 8) * ((int8_t)((q >> 0) & 3) - ((h >> 0) & 1 ? 0 : 4));
|
|
342
|
-
y[32] = d * ((x[i].scales[1] >> 4) - 8) * ((int8_t)((q >> 4) & 3) - ((h >> 4) & 1 ? 0 : 4));
|
|
343
|
-
}
|
|
344
|
-
#endif
|
|
345
|
-
|
|
346
|
-
}
|
|
347
|
-
|
|
348
|
-
#if QK_K == 256
|
|
349
|
-
static inline void get_scale_min_k4(int j, const uint8_t * q, uint8_t & d, uint8_t & m) {
|
|
350
|
-
if (j < 4) {
|
|
351
|
-
d = q[j] & 63;
|
|
352
|
-
m = q[j + 4] & 63;
|
|
353
|
-
} else {
|
|
354
|
-
d = (q[j+4] & 0xF) | ((q[j-4] >> 6) << 4);
|
|
355
|
-
m = (q[j+4] >> 4) | ((q[j-0] >> 6) << 4);
|
|
356
|
-
}
|
|
357
|
-
}
|
|
358
|
-
#endif
|
|
359
|
-
|
|
360
|
-
template <typename dst_t>
|
|
361
|
-
inline void dequantize_q4_K_common(dst_t * __restrict__ y, const uint8_t * __restrict__ qs_ptr, const float dall,
|
|
362
|
-
const float dmin, uint8_t * __restrict__ scales_local, int il, int ir) {
|
|
363
|
-
const int is = 2 * il;
|
|
364
|
-
constexpr int n = 4;
|
|
365
|
-
|
|
366
|
-
uint8_t sc, m;
|
|
367
|
-
get_scale_min_k4(is + 0, scales_local, sc, m);
|
|
368
|
-
const float d1 = dall * sc;
|
|
369
|
-
const float m1 = dmin * m;
|
|
370
|
-
|
|
371
|
-
get_scale_min_k4(is + 1, scales_local, sc, m);
|
|
372
|
-
const float d2 = dall * sc;
|
|
373
|
-
const float m2 = dmin * m;
|
|
374
|
-
|
|
375
|
-
sycl::vec<uint8_t, n> q_vec = vec_aligned_load<uint8_t, n>(qs_ptr + 32 * il + n * ir);
|
|
376
|
-
for (int l = 0; l < n; ++l) {
|
|
377
|
-
y[l + 0] = d1 * (q_vec[l] & 0xF) - m1;
|
|
378
|
-
y[l + 32] = d2 * (q_vec[l] >> 4) - m2;
|
|
379
|
-
}
|
|
380
|
-
}
|
|
381
|
-
|
|
382
|
-
template<typename dst_t>
|
|
383
|
-
static void dequantize_block_q4_K(const void * __restrict__ vx, dst_t * __restrict__ yy,
|
|
384
|
-
uint8_t* scales_local, const sycl::nd_item<3> &item_ct1) {
|
|
385
|
-
const block_q4_K * x = (const block_q4_K *) vx;
|
|
386
|
-
|
|
387
|
-
const int64_t i = item_ct1.get_group(2);
|
|
388
|
-
|
|
389
|
-
#if QK_K == 256
|
|
390
|
-
const int64_t tid = item_ct1.get_local_id(2);
|
|
391
|
-
const int64_t il = tid / 8;
|
|
392
|
-
const int64_t ir = tid % 8;
|
|
393
|
-
|
|
394
|
-
dst_t * y = yy + i * QK_K + 64 * il + 4 * ir;
|
|
395
|
-
|
|
396
|
-
const sycl::half2 dm = x[i].dm;
|
|
397
|
-
const float dall = dm[0];
|
|
398
|
-
const float dmin = dm[1];
|
|
399
|
-
|
|
400
|
-
if (tid < 12) {
|
|
401
|
-
scales_local[tid] = x[i].scales[tid];
|
|
402
|
-
}
|
|
403
|
-
|
|
404
|
-
item_ct1.barrier(sycl::access::fence_space::local_space);
|
|
405
|
-
dequantize_q4_K_common(y, x[i].qs, dall, dmin, scales_local, il, ir);
|
|
406
|
-
#else
|
|
407
|
-
const int64_t tid = item_ct1.get_local_id(2);
|
|
408
|
-
const uint8_t * q = x[i].qs;
|
|
409
|
-
dst_t * y = yy + i*QK_K;
|
|
410
|
-
const float d = (float)x[i].dm[0];
|
|
411
|
-
const float m = (float)x[i].dm[1];
|
|
412
|
-
y[tid+ 0] = d * (x[i].scales[0] & 0xF) * (q[tid] & 0xF) - m * (x[i].scales[0] >> 4);
|
|
413
|
-
y[tid+32] = d * (x[i].scales[1] & 0xF) * (q[tid] >> 4) - m * (x[i].scales[1] >> 4);
|
|
414
|
-
#endif
|
|
415
|
-
}
|
|
416
|
-
|
|
417
|
-
template <typename dst_t>
|
|
418
|
-
static void dequantize_block_q4_K_reorder(const void * __restrict__ vx, dst_t * __restrict__ yy, uint8_t * scales_local,
|
|
419
|
-
const sycl::nd_item<1> & item_ct1, int64_t nb) {
|
|
420
|
-
const int64_t i = item_ct1.get_group(0); // block index
|
|
421
|
-
const int64_t tid = item_ct1.get_local_id(0); // thread index within block
|
|
422
|
-
const int64_t il = tid / 8;
|
|
423
|
-
const int64_t ir = tid % 8;
|
|
424
|
-
|
|
425
|
-
dst_t * y = yy + i * QK_K + 64 * il + 4 * ir;
|
|
426
|
-
|
|
427
|
-
const uint8_t * base = static_cast<const uint8_t *>(vx);
|
|
428
|
-
const size_t qs_offset = i * (QK_K / 2);
|
|
429
|
-
const size_t scales_offset = nb * (QK_K / 2) + i * K_SCALE_SIZE;
|
|
430
|
-
const size_t dm_offset = nb * (QK_K / 2) + nb * K_SCALE_SIZE + i * sizeof(ggml_half2);
|
|
431
|
-
|
|
432
|
-
const uint8_t * qs_ptr = base + qs_offset;
|
|
433
|
-
const uint8_t * scales_ptr = base + scales_offset;
|
|
434
|
-
ggml_half2 dm_values = *reinterpret_cast<const ggml_half2 *>(base + dm_offset);
|
|
435
|
-
|
|
436
|
-
const float dall = dm_values.x();
|
|
437
|
-
const float dmin = dm_values.y();
|
|
438
|
-
|
|
439
|
-
if (tid < 12) {
|
|
440
|
-
scales_local[tid] = scales_ptr[tid];
|
|
441
|
-
}
|
|
442
|
-
|
|
443
|
-
item_ct1.barrier(sycl::access::fence_space::local_space);
|
|
444
|
-
dequantize_q4_K_common(y, qs_ptr, dall, dmin, scales_local, il, ir);
|
|
445
|
-
}
|
|
446
|
-
|
|
447
|
-
template<typename dst_t>
|
|
448
|
-
static void dequantize_block_q5_K(const void * __restrict__ vx, dst_t * __restrict__ yy,
|
|
449
|
-
const sycl::nd_item<3> &item_ct1) {
|
|
450
|
-
const block_q5_K * x = (const block_q5_K *) vx;
|
|
451
|
-
|
|
452
|
-
const int64_t i = item_ct1.get_group(2);
|
|
453
|
-
|
|
454
|
-
#if QK_K == 256
|
|
455
|
-
// assume 64 threads - this is very slightly better than the one below
|
|
456
|
-
const int64_t tid = item_ct1.get_local_id(2);
|
|
457
|
-
const int64_t il = tid/16; // il is in 0...3
|
|
458
|
-
const int64_t ir = tid%16; // ir is in 0...15
|
|
459
|
-
const int64_t is = 2*il; // is is in 0...6
|
|
460
|
-
|
|
461
|
-
dst_t * y = yy + i*QK_K + 64*il + 2*ir;
|
|
462
|
-
|
|
463
|
-
const float dall = x[i].dm[0];
|
|
464
|
-
const float dmin = x[i].dm[1];
|
|
465
|
-
|
|
466
|
-
const uint8_t * ql = x[i].qs + 32*il + 2*ir;
|
|
467
|
-
const uint8_t * qh = x[i].qh + 2*ir;
|
|
468
|
-
|
|
469
|
-
uint8_t sc, m;
|
|
470
|
-
get_scale_min_k4(is + 0, x[i].scales, sc, m);
|
|
471
|
-
const float d1 = dall * sc; const float m1 = dmin * m;
|
|
472
|
-
get_scale_min_k4(is + 1, x[i].scales, sc, m);
|
|
473
|
-
const float d2 = dall * sc; const float m2 = dmin * m;
|
|
474
|
-
|
|
475
|
-
uint8_t hm = 1 << (2*il);
|
|
476
|
-
y[ 0] = d1 * ((ql[ 0] & 0xF) + (qh[ 0] & hm ? 16 : 0)) - m1;
|
|
477
|
-
y[ 1] = d1 * ((ql[ 1] & 0xF) + (qh[ 1] & hm ? 16 : 0)) - m1;
|
|
478
|
-
hm <<= 1;
|
|
479
|
-
y[32] = d2 * ((ql[ 0] >> 4) + (qh[ 0] & hm ? 16 : 0)) - m2;
|
|
480
|
-
y[33] = d2 * ((ql[ 1] >> 4) + (qh[ 1] & hm ? 16 : 0)) - m2;
|
|
481
|
-
#else
|
|
482
|
-
const int64_t tid = item_ct1.get_local_id(2);
|
|
483
|
-
const uint8_t q = x[i].qs[tid];
|
|
484
|
-
const int64_t im = tid/8; // 0...3
|
|
485
|
-
const int64_t in = tid%8; // 0...7
|
|
486
|
-
const int64_t is = tid/16; // 0 or 1
|
|
487
|
-
const uint8_t h = x[i].qh[in] >> im;
|
|
488
|
-
const float d = x[i].d;
|
|
489
|
-
dst_t * y = yy + i*QK_K + tid;
|
|
490
|
-
y[ 0] = d * x[i].scales[is+0] * ((q & 0xF) - ((h >> 0) & 1 ? 0 : 16));
|
|
491
|
-
y[32] = d * x[i].scales[is+2] * ((q >> 4) - ((h >> 4) & 1 ? 0 : 16));
|
|
492
|
-
#endif
|
|
493
|
-
}
|
|
494
|
-
|
|
495
|
-
template<typename dst_t>
|
|
496
|
-
static void dequantize_block_q6_K(const void * __restrict__ vx, dst_t * __restrict__ yy,
|
|
497
|
-
const sycl::nd_item<3> &item_ct1) {
|
|
498
|
-
const block_q6_K * x = (const block_q6_K *) vx;
|
|
499
|
-
|
|
500
|
-
const int64_t i = item_ct1.get_group(2);
|
|
501
|
-
#if QK_K == 256
|
|
502
|
-
|
|
503
|
-
// assume 64 threads - this is very slightly better than the one below
|
|
504
|
-
const int64_t tid = item_ct1.get_local_id(2);
|
|
505
|
-
const int64_t ip = tid/32; // ip is 0 or 1
|
|
506
|
-
const int64_t il = tid - 32*ip; // 0...32
|
|
507
|
-
const int64_t is = 8*ip + il/16;
|
|
508
|
-
|
|
509
|
-
dst_t * y = yy + i*QK_K + 128*ip + il;
|
|
510
|
-
|
|
511
|
-
const float d = x[i].d;
|
|
512
|
-
|
|
513
|
-
const uint8_t * ql = x[i].ql + 64*ip + il;
|
|
514
|
-
const uint8_t qh = x[i].qh[32*ip + il];
|
|
515
|
-
const int8_t * sc = x[i].scales + is;
|
|
516
|
-
|
|
517
|
-
y[ 0] = d * sc[0] * ((int8_t)((ql[ 0] & 0xF) | (((qh >> 0) & 3) << 4)) - 32);
|
|
518
|
-
y[32] = d * sc[2] * ((int8_t)((ql[32] & 0xF) | (((qh >> 2) & 3) << 4)) - 32);
|
|
519
|
-
y[64] = d * sc[4] * ((int8_t)((ql[ 0] >> 4) | (((qh >> 4) & 3) << 4)) - 32);
|
|
520
|
-
y[96] = d * sc[6] * ((int8_t)((ql[32] >> 4) | (((qh >> 6) & 3) << 4)) - 32);
|
|
521
|
-
#else
|
|
522
|
-
|
|
523
|
-
// assume 32 threads
|
|
524
|
-
const int64_t tid = item_ct1.get_local_id(2);
|
|
525
|
-
const int64_t ip = tid/16; // 0 or 1
|
|
526
|
-
const int64_t il = tid - 16*ip; // 0...15
|
|
527
|
-
|
|
528
|
-
dst_t * y = yy + i*QK_K + 16*ip + il;
|
|
529
|
-
|
|
530
|
-
const float d = x[i].d;
|
|
531
|
-
|
|
532
|
-
const uint8_t ql = x[i].ql[16*ip + il];
|
|
533
|
-
const uint8_t qh = x[i].qh[il] >> (2*ip);
|
|
534
|
-
const int8_t * sc = x[i].scales;
|
|
535
|
-
|
|
536
|
-
y[ 0] = d * sc[ip+0] * ((int8_t)((ql & 0xF) | (((qh >> 0) & 3) << 4)) - 32);
|
|
537
|
-
y[32] = d * sc[ip+2] * ((int8_t)((ql >> 4) | (((qh >> 4) & 3) << 4)) - 32);
|
|
538
|
-
#endif
|
|
539
|
-
}
|
|
540
|
-
|
|
541
|
-
template<typename dst_t>
|
|
542
|
-
static void dequantize_block_iq2_xxs(const void * __restrict__ vx, dst_t * __restrict__ yy,
|
|
543
|
-
const sycl::nd_item<3> &item_ct1,
|
|
544
|
-
const uint64_t *iq2xxs_grid_ptr,
|
|
545
|
-
const uint8_t *ksigns_iq2xs_ptr,
|
|
546
|
-
const uint8_t *kmask_iq2xs_ptr) {
|
|
547
|
-
|
|
548
|
-
const int64_t i = item_ct1.get_group(2);
|
|
549
|
-
const block_iq2_xxs * x = (const block_iq2_xxs *) vx;
|
|
550
|
-
|
|
551
|
-
const int64_t tid = item_ct1.get_local_id(2);
|
|
552
|
-
#if QK_K == 256
|
|
553
|
-
const int64_t il = tid/8; // 0...3
|
|
554
|
-
const int64_t ib = tid%8; // 0...7
|
|
555
|
-
dst_t * y = yy + i*QK_K + 32*ib + 8*il;
|
|
556
|
-
const uint16_t * q2 = x[i].qs + 4*ib;
|
|
557
|
-
const uint8_t * aux8 = (const uint8_t *)q2;
|
|
558
|
-
const uint8_t * grid = (const uint8_t *)(iq2xxs_grid_ptr + aux8[il]);
|
|
559
|
-
const uint32_t aux32 = q2[2] | (q2[3] << 16);
|
|
560
|
-
const float d = (float)x[i].d * (0.5f + (aux32 >> 28)) * 0.25f;
|
|
561
|
-
const uint8_t signs = ksigns_iq2xs_ptr[(aux32 >> 7*il) & 127];
|
|
562
|
-
for (int j = 0; j < 8; ++j) y[j] = d * grid[j] * (signs & kmask_iq2xs_ptr[j] ? -1.f : 1.f);
|
|
563
|
-
#else
|
|
564
|
-
assert(false);
|
|
565
|
-
#endif
|
|
566
|
-
|
|
567
|
-
}
|
|
568
|
-
|
|
569
|
-
template<typename dst_t>
|
|
570
|
-
static void dequantize_block_iq2_xs(const void * __restrict__ vx, dst_t * __restrict__ yy,
|
|
571
|
-
const sycl::nd_item<3> &item_ct1,
|
|
572
|
-
const uint64_t *iq2xs_grid,
|
|
573
|
-
const uint8_t *ksigns_iq2xs,
|
|
574
|
-
const uint8_t *kmask_iq2xs) {
|
|
575
|
-
|
|
576
|
-
const int64_t i = item_ct1.get_group(2);
|
|
577
|
-
const block_iq2_xs * x = (const block_iq2_xs *) vx;
|
|
578
|
-
|
|
579
|
-
const int64_t tid = item_ct1.get_local_id(2);
|
|
580
|
-
#if QK_K == 256
|
|
581
|
-
const int64_t il = tid/8; // 0...3
|
|
582
|
-
const int64_t ib = tid%8; // 0...7
|
|
583
|
-
dst_t * y = yy + i*QK_K + 32*ib + 8*il;
|
|
584
|
-
const uint16_t * q2 = x[i].qs + 4*ib;
|
|
585
|
-
const uint8_t * grid = (const uint8_t *)(iq2xs_grid + (q2[il] & 511));
|
|
586
|
-
const float d = (float)x[i].d * (0.5f + ((x[i].scales[ib] >> 4*(il/2)) & 0xf)) * 0.25f;
|
|
587
|
-
const uint8_t signs = ksigns_iq2xs[q2[il] >> 9];
|
|
588
|
-
for (int j = 0; j < 8; ++j) y[j] = d * grid[j] * (signs & kmask_iq2xs[j] ? -1.f : 1.f);
|
|
589
|
-
#else
|
|
590
|
-
assert(false);
|
|
591
|
-
#endif
|
|
592
|
-
|
|
593
|
-
}
|
|
594
|
-
|
|
595
|
-
template <typename dst_t>
|
|
596
|
-
__dpct_inline__ static void
|
|
597
|
-
dequantize_block_iq2_s(const void *__restrict__ vx, dst_t *__restrict__ yy,
|
|
598
|
-
const sycl::nd_item<3> &item_ct1) {
|
|
599
|
-
|
|
600
|
-
const int64_t i = item_ct1.get_group(2);
|
|
601
|
-
const block_iq2_s * x = (const block_iq2_s *) vx;
|
|
602
|
-
|
|
603
|
-
const int64_t tid = item_ct1.get_local_id(2);
|
|
604
|
-
#if QK_K == 256
|
|
605
|
-
const int64_t il = tid/8; // 0...3
|
|
606
|
-
const int64_t ib = tid%8; // 0...7
|
|
607
|
-
dst_t * y = yy + i*QK_K + 32*ib + 8*il;
|
|
608
|
-
const uint8_t * grid = (const uint8_t *)(iq2s_grid + (x[i].qs[4*ib+il] | ((x[i].qh[ib] << (8-2*il)) & 0x300)));
|
|
609
|
-
const float d = (float)x[i].d * (0.5f + ((x[i].scales[ib] >> 4*(il/2)) & 0xf)) * 0.25f;
|
|
610
|
-
const uint8_t signs = x[i].qs[QK_K/8+4*ib+il];
|
|
611
|
-
#pragma unroll
|
|
612
|
-
for (int j = 0; j < 8; ++j)
|
|
613
|
-
y[j] = d * grid[j] * (signs & kmask_iq2xs[j] ? -1.f : 1.f);
|
|
614
|
-
#else
|
|
615
|
-
assert(false);
|
|
616
|
-
|
|
617
|
-
#endif
|
|
618
|
-
|
|
619
|
-
}
|
|
620
|
-
|
|
621
|
-
template<typename dst_t>
|
|
622
|
-
static void dequantize_block_iq3_xxs(const void * __restrict__ vx, dst_t * __restrict__ yy,
|
|
623
|
-
const sycl::nd_item<3> &item_ct1,
|
|
624
|
-
const uint32_t *iq3xxs_grid,
|
|
625
|
-
const uint8_t *ksigns_iq2xs,
|
|
626
|
-
const uint8_t *kmask_iq2xs) {
|
|
627
|
-
|
|
628
|
-
const int64_t i = item_ct1.get_group(2);
|
|
629
|
-
const block_iq3_xxs * x = (const block_iq3_xxs *) vx;
|
|
630
|
-
|
|
631
|
-
const int64_t tid = item_ct1.get_local_id(2);
|
|
632
|
-
#if QK_K == 256
|
|
633
|
-
const int64_t il = tid/8; // 0...3
|
|
634
|
-
const int64_t ib = tid%8; // 0...7
|
|
635
|
-
dst_t * y = yy + i*QK_K + 32*ib + 8*il;
|
|
636
|
-
const uint8_t * q3 = x[i].qs + 8*ib;
|
|
637
|
-
const uint16_t * gas = (const uint16_t *)(x[i].qs + QK_K/4) + 2*ib;
|
|
638
|
-
const uint8_t * grid1 = (const uint8_t *)(iq3xxs_grid + q3[2*il+0]);
|
|
639
|
-
const uint8_t * grid2 = (const uint8_t *)(iq3xxs_grid + q3[2*il+1]);
|
|
640
|
-
const uint32_t aux32 = gas[0] | (gas[1] << 16);
|
|
641
|
-
const float d = (float)x[i].d * (0.5f + (aux32 >> 28)) * 0.5f;
|
|
642
|
-
const uint8_t signs = ksigns_iq2xs[(aux32 >> 7*il) & 127];
|
|
643
|
-
for (int j = 0; j < 4; ++j) {
|
|
644
|
-
y[j+0] = d * grid1[j] * (signs & kmask_iq2xs[j+0] ? -1.f : 1.f);
|
|
645
|
-
y[j+4] = d * grid2[j] * (signs & kmask_iq2xs[j+4] ? -1.f : 1.f);
|
|
646
|
-
}
|
|
647
|
-
#else
|
|
648
|
-
assert(false);
|
|
649
|
-
#endif
|
|
650
|
-
|
|
651
|
-
}
|
|
652
|
-
|
|
653
|
-
template <typename dst_t>
|
|
654
|
-
__dpct_inline__ static void
|
|
655
|
-
dequantize_block_iq3_s(const void *__restrict__ vx, dst_t *__restrict__ yy,
|
|
656
|
-
const sycl::nd_item<3> &item_ct1,
|
|
657
|
-
const uint8_t *kmask_iq2xs, const uint32_t *iq3s_grid) {
|
|
658
|
-
|
|
659
|
-
const int64_t i = item_ct1.get_group(2);
|
|
660
|
-
const block_iq3_s * x = (const block_iq3_s *) vx;
|
|
661
|
-
|
|
662
|
-
const int64_t tid = item_ct1.get_local_id(2);
|
|
663
|
-
#if QK_K == 256
|
|
664
|
-
const int64_t il = tid/8; // 0...3
|
|
665
|
-
const int64_t ib = tid%8; // 0...7
|
|
666
|
-
dst_t * y = yy + i*QK_K + 32*ib + 8*il;
|
|
667
|
-
const uint8_t * qs = x[i].qs + 8*ib;
|
|
668
|
-
const uint8_t * grid1 = (const uint8_t *)(iq3s_grid + (qs[2*il+0] | ((x[i].qh[ib] << (8-2*il)) & 256)));
|
|
669
|
-
const uint8_t * grid2 = (const uint8_t *)(iq3s_grid + (qs[2*il+1] | ((x[i].qh[ib] << (7-2*il)) & 256)));
|
|
670
|
-
const float d = (float)x[i].d * (1 + 2*((x[i].scales[ib/2] >> 4*(ib%2)) & 0xf));
|
|
671
|
-
const uint8_t signs = x[i].signs[4*ib + il];
|
|
672
|
-
#pragma unroll
|
|
673
|
-
for (int j = 0; j < 4; ++j) {
|
|
674
|
-
y[j+0] = d * grid1[j] * (signs & kmask_iq2xs[j+0] ? -1.f : 1.f);
|
|
675
|
-
y[j+4] = d * grid2[j] * (signs & kmask_iq2xs[j+4] ? -1.f : 1.f);
|
|
676
|
-
}
|
|
677
|
-
#else
|
|
678
|
-
assert(false);
|
|
679
|
-
#endif
|
|
680
|
-
|
|
681
|
-
}
|
|
682
|
-
|
|
683
|
-
template <typename dst_t>
|
|
684
|
-
__dpct_inline__ static void
|
|
685
|
-
dequantize_block_iq1_s(const void *__restrict__ vx, dst_t *__restrict__ yy,
|
|
686
|
-
const sycl::nd_item<3> &item_ct1,
|
|
687
|
-
const uint32_t *iq1s_grid_gpu) {
|
|
688
|
-
|
|
689
|
-
const int64_t i = item_ct1.get_group(2);
|
|
690
|
-
const block_iq1_s * x = (const block_iq1_s *) vx;
|
|
691
|
-
|
|
692
|
-
const int64_t tid = item_ct1.get_local_id(2);
|
|
693
|
-
#if QK_K == 256
|
|
694
|
-
const int64_t il = tid/8; // 0...3
|
|
695
|
-
const int64_t ib = tid%8; // 0...7
|
|
696
|
-
dst_t * y = yy + i*QK_K + 32*ib + 8*il;
|
|
697
|
-
const float delta = x[i].qh[ib] & 0x8000 ? -1 - IQ1S_DELTA : -1 + IQ1S_DELTA;
|
|
698
|
-
const float d = (float)x[i].d * (2*((x[i].qh[ib] >> 12) & 7) + 1);
|
|
699
|
-
uint32_t grid32[2]; const int8_t * q = (const int8_t *)grid32;
|
|
700
|
-
grid32[0] = iq1s_grid_gpu[x[i].qs[4*ib+il] | (((x[i].qh[ib] >> 3*il) & 7) << 8)];
|
|
701
|
-
grid32[1] = (grid32[0] >> 4) & 0x0f0f0f0f;
|
|
702
|
-
grid32[0] &= 0x0f0f0f0f;
|
|
703
|
-
#pragma unroll
|
|
704
|
-
for (int j = 0; j < 8; ++j) {
|
|
705
|
-
y[j] = d * (q[j] + delta);
|
|
706
|
-
}
|
|
707
|
-
#else
|
|
708
|
-
assert(false);
|
|
709
|
-
#endif
|
|
710
|
-
|
|
711
|
-
}
|
|
712
|
-
|
|
713
|
-
template <typename dst_t>
|
|
714
|
-
__dpct_inline__ static void
|
|
715
|
-
dequantize_block_iq1_m(const void *__restrict__ vx, dst_t *__restrict__ yy,
|
|
716
|
-
const sycl::nd_item<3> &item_ct1,
|
|
717
|
-
const uint32_t *iq1s_grid_gpu) {
|
|
718
|
-
|
|
719
|
-
const int64_t i = item_ct1.get_group(2);
|
|
720
|
-
const block_iq1_m * x = (const block_iq1_m *) vx;
|
|
721
|
-
|
|
722
|
-
const int64_t tid = item_ct1.get_local_id(2);
|
|
723
|
-
#if QK_K == 256
|
|
724
|
-
const int64_t il = tid/8; // 0...3
|
|
725
|
-
const int64_t ib = tid%8; // 0...7
|
|
726
|
-
dst_t * y = yy + i*QK_K + 32*ib + 8*il;
|
|
727
|
-
const uint16_t * sc = (const uint16_t *)x[i].scales;
|
|
728
|
-
iq1m_scale_t scale;
|
|
729
|
-
scale.u16 = (sc[0] >> 12) | ((sc[1] >> 8) & 0x00f0) | ((sc[2] >> 4) & 0x0f00) | (sc[3] & 0xf000);
|
|
730
|
-
const int ib16 = 2*ib + il/2; // sc[ib16/4] >> 3*(ib16%4) -> sc[ib/2] >> 3*((2*ib+il/2)%4);
|
|
731
|
-
const float d = (float)scale.f16 * (2*((sc[ib16/4] >> 3*(ib16%4)) & 0x7) + 1);
|
|
732
|
-
const float delta = x[i].qh[2*ib+il/2] & (0x08 << 4*(il%2)) ? -1 - IQ1M_DELTA : -1 + IQ1M_DELTA;
|
|
733
|
-
uint32_t grid32[2]; const int8_t * q = (const int8_t *)grid32;
|
|
734
|
-
grid32[0] = iq1s_grid_gpu[x[i].qs[4*ib+il] | (((x[i].qh[2*ib+il/2] >> 4*(il%2)) & 7) << 8)];
|
|
735
|
-
grid32[1] = (grid32[0] >> 4) & 0x0f0f0f0f;
|
|
736
|
-
grid32[0] &= 0x0f0f0f0f;
|
|
737
|
-
#pragma unroll
|
|
738
|
-
for (int j = 0; j < 8; ++j) {
|
|
739
|
-
y[j] = d * (q[j] + delta);
|
|
740
|
-
}
|
|
741
|
-
#else
|
|
742
|
-
assert(false);
|
|
743
|
-
#endif
|
|
744
|
-
|
|
745
|
-
}
|
|
746
|
-
|
|
747
|
-
template <typename dst_t>
|
|
748
|
-
__dpct_inline__ static void
|
|
749
|
-
dequantize_block_iq4_nl(const void *__restrict__ vx, dst_t *__restrict__ yy,
|
|
750
|
-
const sycl::nd_item<3> &item_ct1) {
|
|
751
|
-
|
|
752
|
-
const int64_t i = item_ct1.get_group(2);
|
|
753
|
-
const block_iq4_nl * x = (const block_iq4_nl *) vx + i*(QK_K/QK4_NL);
|
|
754
|
-
|
|
755
|
-
const int64_t tid = item_ct1.get_local_id(2);
|
|
756
|
-
const int64_t il = tid/8; // 0...3
|
|
757
|
-
const int64_t ib = tid%8; // 0...7
|
|
758
|
-
dst_t * y = yy + i*QK_K + 32*ib + 4*il;
|
|
759
|
-
const uint8_t * q4 = x[ib].qs + 4*il;
|
|
760
|
-
const float d = (float)x[ib].d;
|
|
761
|
-
#pragma unroll
|
|
762
|
-
for (int j = 0; j < 4; ++j) {
|
|
763
|
-
y[j+ 0] = d * kvalues_iq4nl[q4[j] & 0xf];
|
|
764
|
-
y[j+16] = d * kvalues_iq4nl[q4[j] >> 4];
|
|
765
|
-
}
|
|
766
|
-
|
|
767
|
-
}
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
template <typename dst_t>
|
|
771
|
-
__dpct_inline__ static void
|
|
772
|
-
dequantize_block_iq4_xs(const void *__restrict__ vx, dst_t *__restrict__ yy,
|
|
773
|
-
const sycl::nd_item<3> &item_ct1) {
|
|
774
|
-
const int64_t i = item_ct1.get_group(2);
|
|
775
|
-
const block_iq4_xs * x = (const block_iq4_xs *)vx;
|
|
776
|
-
|
|
777
|
-
const int64_t tid = item_ct1.get_local_id(2);
|
|
778
|
-
const int64_t il = tid/8; // 0...3
|
|
779
|
-
const int64_t ib = tid%8; // 0...7
|
|
780
|
-
dst_t * y = yy + i*QK_K + 32*ib + 4*il;
|
|
781
|
-
const uint8_t * q4 = x[i].qs + 16*ib + 4*il;
|
|
782
|
-
const float d = (float)x[i].d * ((((x[i].scales_l[ib/2] >> 4*(ib%2)) & 0xf) | (((x[i].scales_h >> 2*ib) & 3) << 4)) - 32);
|
|
783
|
-
#pragma unroll
|
|
784
|
-
for (int j = 0; j < 4; ++j) {
|
|
785
|
-
y[j+ 0] = d * kvalues_iq4nl[q4[j] & 0xf];
|
|
786
|
-
y[j+16] = d * kvalues_iq4nl[q4[j] >> 4];
|
|
787
|
-
}
|
|
788
|
-
}
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
#endif // GGML_SYCL_DEQUANTIZE_HPP
|