llama-cpp-bin 9093.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_cpp_bin-9093.0.0/.github/scripts/generate-pep503-index.py +67 -0
- llama_cpp_bin-9093.0.0/.github/workflows/build-everything.yml +131 -0
- llama_cpp_bin-9093.0.0/.github/workflows/build-wheels-cpu.yml +140 -0
- llama_cpp_bin-9093.0.0/.github/workflows/build-wheels-cuda.yml +215 -0
- llama_cpp_bin-9093.0.0/.github/workflows/build-wheels-macos.yml +121 -0
- llama_cpp_bin-9093.0.0/.github/workflows/build-wheels-rocm.yml +213 -0
- llama_cpp_bin-9093.0.0/.github/workflows/build-wheels-vulkan.yml +160 -0
- llama_cpp_bin-9093.0.0/.gitignore +24 -0
- llama_cpp_bin-9093.0.0/.gitmodules +3 -0
- llama_cpp_bin-9093.0.0/LICENSE +661 -0
- llama_cpp_bin-9093.0.0/MANIFEST.in +16 -0
- llama_cpp_bin-9093.0.0/PKG-INFO +69 -0
- llama_cpp_bin-9093.0.0/README.md +53 -0
- llama_cpp_bin-9093.0.0/llama.cpp/CMakeLists.txt +290 -0
- llama_cpp_bin-9093.0.0/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
- llama_cpp_bin-9093.0.0/llama.cpp/cmake/arm64-linux-clang.cmake +17 -0
- llama_cpp_bin-9093.0.0/llama.cpp/cmake/arm64-windows-llvm.cmake +16 -0
- llama_cpp_bin-9093.0.0/llama.cpp/cmake/build-info.cmake +48 -0
- llama_cpp_bin-9093.0.0/llama.cpp/cmake/common.cmake +58 -0
- llama_cpp_bin-9093.0.0/llama.cpp/cmake/download-models.cmake +21 -0
- llama_cpp_bin-9093.0.0/llama.cpp/cmake/git-vars.cmake +22 -0
- llama_cpp_bin-9093.0.0/llama.cpp/cmake/license.cmake +40 -0
- llama_cpp_bin-9093.0.0/llama.cpp/cmake/llama-config.cmake.in +30 -0
- llama_cpp_bin-9093.0.0/llama.cpp/cmake/llama.pc.in +10 -0
- llama_cpp_bin-9093.0.0/llama.cpp/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
- llama_cpp_bin-9093.0.0/llama.cpp/cmake/x64-windows-llvm.cmake +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/CMakeLists.txt +172 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/arg.cpp +4107 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/arg.h +133 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/base64.hpp +392 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/build-info.cpp.in +35 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/build-info.h +11 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/chat-auto-parser-generator.cpp +470 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/chat-auto-parser-helpers.cpp +364 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/chat-auto-parser-helpers.h +74 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/chat-auto-parser.h +438 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/chat-diff-analyzer.cpp +1395 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/chat-peg-parser.cpp +1033 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/chat-peg-parser.h +198 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/chat.cpp +2393 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/chat.h +293 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/common.cpp +1962 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/common.h +1028 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/console.cpp +1166 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/console.h +46 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/debug.cpp +190 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/debug.h +31 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/download.cpp +958 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/download.h +107 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/fit.cpp +959 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/fit.h +32 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/hf-cache.cpp +772 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/hf-cache.h +36 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/http.h +99 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/jinja/caps.cpp +479 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/jinja/caps.h +32 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/jinja/lexer.cpp +341 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/jinja/lexer.h +157 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/jinja/parser.cpp +602 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/jinja/parser.h +21 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/jinja/runtime.cpp +906 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/jinja/runtime.h +652 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/jinja/string.cpp +213 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/jinja/string.h +61 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/jinja/utils.h +149 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/jinja/value.cpp +1484 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/jinja/value.h +759 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/json-partial.cpp +324 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/json-partial.h +39 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/json-schema-to-grammar.cpp +1189 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/json-schema-to-grammar.h +43 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/llguidance.cpp +258 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/log.cpp +453 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/log.h +123 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/ngram-cache.cpp +285 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/ngram-cache.h +101 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/ngram-map.cpp +530 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/ngram-map.h +115 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/ngram-mod.cpp +60 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/ngram-mod.h +38 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/peg-parser.cpp +2128 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/peg-parser.h +523 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/preset.cpp +483 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/preset.h +83 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/reasoning-budget.cpp +250 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/reasoning-budget.h +42 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/regex-partial.cpp +204 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/regex-partial.h +56 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/sampling.cpp +843 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/sampling.h +119 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/speculative.cpp +1248 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/speculative.h +46 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/unicode.cpp +124 -0
- llama_cpp_bin-9093.0.0/llama.cpp/common/unicode.h +30 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/CMakeLists.txt +45 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/batched/CMakeLists.txt +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/batched/batched.cpp +264 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +945 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/debug/CMakeLists.txt +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/debug/debug.cpp +261 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +38 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/diffusion/CMakeLists.txt +10 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/diffusion/diffusion-cli.cpp +268 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/diffusion/diffusion.cpp +408 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/diffusion/diffusion.h +57 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/embedding/CMakeLists.txt +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/embedding/embedding.cpp +414 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/eval-callback/CMakeLists.txt +26 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/eval-callback/eval-callback.cpp +88 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/gen-docs/CMakeLists.txt +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/gen-docs/gen-docs.cpp +145 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/gguf/CMakeLists.txt +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/gguf/gguf.cpp +273 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/gguf-hash/CMakeLists.txt +22 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +46 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +295 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +52 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +221 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +24 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +42 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +7093 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/gguf-hash/gguf-hash.cpp +697 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/idle/CMakeLists.txt +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/idle/idle.cpp +110 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/llama.android/lib/src/main/cpp/CMakeLists.txt +56 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/llama.android/lib/src/main/cpp/ai_chat.cpp +565 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/llama.android/lib/src/main/cpp/logging.h +61 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/lookahead/CMakeLists.txt +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/lookahead/lookahead.cpp +483 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/lookup/CMakeLists.txt +23 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/lookup/lookup-create.cpp +45 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/lookup/lookup-merge.cpp +50 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/lookup/lookup-stats.cpp +160 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/lookup/lookup.cpp +245 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/model-conversion/requirements.txt +7 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/parallel/CMakeLists.txt +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/parallel/parallel.cpp +520 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/passkey/CMakeLists.txt +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/passkey/passkey.cpp +277 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/retrieval/CMakeLists.txt +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/retrieval/retrieval.cpp +307 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/save-load-state/CMakeLists.txt +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/save-load-state/save-load-state.cpp +320 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/simple/CMakeLists.txt +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/simple/simple.cpp +223 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/simple-chat/simple-chat.cpp +210 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +11 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/speculative/CMakeLists.txt +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/speculative/speculative.cpp +660 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/speculative-simple/speculative-simple.cpp +348 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/sycl/CMakeLists.txt +9 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/sycl/ls-sycl-device.cpp +15 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/training/CMakeLists.txt +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/examples/training/finetune.cpp +101 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/CMakeLists.txt +504 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/cmake/FindNCCL.cmake +36 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/cmake/common.cmake +50 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/cmake/ggml-config.cmake.in +191 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/include/ggml-alloc.h +85 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/include/ggml-backend.h +431 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/include/ggml-blas.h +25 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/include/ggml-cann.h +123 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/include/ggml-cpp.h +39 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/include/ggml-cpu.h +151 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/include/ggml-cuda.h +50 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/include/ggml-hexagon.h +19 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/include/ggml-metal.h +61 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/include/ggml-opencl.h +26 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/include/ggml-openvino.h +37 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/include/ggml-opt.h +256 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/include/ggml-rpc.h +35 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/include/ggml-sycl.h +49 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/include/ggml-virtgpu.h +14 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/include/ggml-vulkan.h +29 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/include/ggml-webgpu.h +19 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/include/ggml-zdnn.h +17 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/include/ggml-zendnn.h +22 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/include/ggml.h +2840 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/include/gguf.h +204 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/CMakeLists.txt +493 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-alloc.c +1248 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-backend-dl.cpp +48 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-backend-dl.h +45 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-backend-impl.h +275 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-backend-meta.cpp +2143 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-backend-reg.cpp +586 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-backend.cpp +2371 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +101 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +522 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +89 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +195 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +349 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +4436 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +1190 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cann/common.h +651 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +3062 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-common.h +1900 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +718 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +249 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/amx/common.h +115 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4245 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +5156 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2158 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2304 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +4553 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +1703 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1465 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1220 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +3970 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6407 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +348 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/common.h +95 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +539 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +3821 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +703 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/hbm.h +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +939 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1513 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4051 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/ops.cpp +11348 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/ops.h +119 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/quants.c +1288 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/quants.h +103 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/repack.cpp +4836 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/repack.h +245 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/simd-gemm.h +226 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1319 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/traits.h +38 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +35 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/vec.cpp +629 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cpu/vec.h +1588 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +268 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +28 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +304 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +150 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/CMakeLists.txt +118 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3678 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +78 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/act-ops.c +782 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +293 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +872 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +252 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/cumsum-ops.c +270 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/diag-ops.c +216 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/fill-ops.c +123 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +727 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/gated-delta-net-ops.c +955 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +124 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +372 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +86 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +137 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hmx-flash-attn-ops.c +1840 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c +1790 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hmx-ops.h +71 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hmx-profile.h +34 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hmx-queue.c +158 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hmx-queue.h +134 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hmx-utils.h +202 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +111 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +180 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +443 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +308 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +262 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +291 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +216 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +210 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +296 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +142 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +18 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/main.c +879 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +3173 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/repeat-ops.c +148 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +494 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +184 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +407 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/solve-tri-ops.c +267 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/ssm-conv.c +340 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +128 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +637 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/vtcm-utils.h +16 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/htp-drv.h +121 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/libdl.h +79 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hexagon/op-desc.h +153 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +157 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-impl.h +783 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +124 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +2018 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.h +296 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +1175 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +4606 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-metal/ggml-metal-ops.h +97 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-metal/ggml-metal.cpp +950 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +10627 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +124 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +179 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +14969 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/CMakeLists.txt +22 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/ggml-decoder.cpp +985 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/ggml-decoder.h +294 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +380 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/ggml-openvino-extra.h +182 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/ggml-openvino.cpp +1132 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/ggml-quants.cpp +956 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/ggml-quants.h +153 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/decoder.h +74 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/frontend.cpp +27 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/frontend.h +23 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/input_model.cpp +17 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/input_model.h +29 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/node_context.h +112 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/op/cont.cpp +48 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/op/cpy.cpp +21 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +90 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/op/get_rows.cpp +69 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +61 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +62 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +90 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/op/permute.cpp +102 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/op/reshape.cpp +83 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp +46 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/op/rope.cpp +149 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/op/scale.cpp +41 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +76 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/op/softmax.cpp +89 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/op/transpose.cpp +23 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/op/unary_gelu.cpp +25 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/op/unary_silu.cpp +27 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/op/view.cpp +53 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/op_table.cpp +47 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/op_table.h +40 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp +60 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.h +17 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.h +29 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.cpp +58 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.h +17 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/rt_info/weightless_caching_attributes.hpp +41 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/translate_session.cpp +317 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/translate_session.h +28 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/utils.cpp +257 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/openvino/utils.h +86 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/utils.cpp +880 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-openvino/utils.h +143 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-opt.cpp +1094 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-quants.c +5491 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-quants.h +112 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +33 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +1974 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-rpc/transport.cpp +683 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-rpc/transport.h +34 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +178 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/add-id.cpp +81 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/add-id.hpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/backend.hpp +48 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +346 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/common.cpp +83 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/common.hpp +998 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/concat.cpp +202 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/concat.hpp +20 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/conv.cpp +101 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/conv.hpp +20 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/convert.cpp +825 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/convert.hpp +64 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/count-equal.cpp +79 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/count-equal.hpp +9 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +602 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +223 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/cumsum.cpp +148 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/cumsum.hpp +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +975 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/diag.cpp +67 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/diag.hpp +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +1579 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +27 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +3774 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1124 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +94 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/fattn-buffers.cpp +56 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/fattn-buffers.hpp +63 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/fattn-common.hpp +1181 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/fattn-tile.cpp +59 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/fattn-tile.hpp +1246 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/fattn-vec.hpp +674 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/fattn.cpp +227 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/fattn.hpp +22 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/fill.cpp +55 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/fill.hpp +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/gated_delta_net.cpp +307 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/gated_delta_net.hpp +9 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +93 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +219 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +20 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +5451 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/gla.cpp +106 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +136 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +21 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +3030 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +33 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1380 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +43 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/norm.cpp +656 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/norm.hpp +28 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +47 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +10 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/pad.cpp +97 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/pad.hpp +24 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/presets.hpp +79 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/quantize.hpp +133 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/quants.hpp +156 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/roll.cpp +122 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/roll.hpp +20 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/rope.cpp +641 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/rope.hpp +26 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/set.cpp +73 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/set.hpp +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +240 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +426 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +24 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/solve_tri.cpp +172 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/solve_tri.hpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/ssm_conv.cpp +132 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/ssm_scan.cpp +156 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/ssm_scan.hpp +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +67 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +38 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq112-dv112.cpp +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq128-dv128.cpp +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq256-dv256.cpp +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq40-dv40.cpp +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq512-dv512.cpp +6 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq576-dv512.cpp +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq64-dv64.cpp +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq72-dv72.cpp +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq80-dv80.cpp +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq96-dv96.cpp +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +73 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +20 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/type.hpp +112 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/upscale.cpp +410 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/upscale.hpp +9 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +1508 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +293 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +10 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-threading.h +14 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +123 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +71 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +99 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +545 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-virtgpu/virtgpu.h +115 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +220 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +17110 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +31 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +1203 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +3094 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +4384 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +637 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-zendnn/CMakeLists.txt +91 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +650 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml.c +7775 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/ggml.cpp +26 -0
- llama_cpp_bin-9093.0.0/llama.cpp/ggml/src/gguf.cpp +1556 -0
- llama_cpp_bin-9093.0.0/llama.cpp/include/llama-cpp.h +30 -0
- llama_cpp_bin-9093.0.0/llama.cpp/include/llama.h +1568 -0
- llama_cpp_bin-9093.0.0/llama.cpp/pocs/CMakeLists.txt +14 -0
- llama_cpp_bin-9093.0.0/llama.cpp/pocs/vdot/CMakeLists.txt +9 -0
- llama_cpp_bin-9093.0.0/llama.cpp/pocs/vdot/q8dot.cpp +173 -0
- llama_cpp_bin-9093.0.0/llama.cpp/pocs/vdot/vdot.cpp +311 -0
- llama_cpp_bin-9093.0.0/llama.cpp/requirements/requirements-all.txt +18 -0
- llama_cpp_bin-9093.0.0/llama.cpp/requirements/requirements-compare-llama-bench.txt +3 -0
- llama_cpp_bin-9093.0.0/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +9 -0
- llama_cpp_bin-9093.0.0/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +1 -0
- llama_cpp_bin-9093.0.0/llama.cpp/requirements/requirements-convert_legacy_llama.txt +7 -0
- llama_cpp_bin-9093.0.0/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +1 -0
- llama_cpp_bin-9093.0.0/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +4 -0
- llama_cpp_bin-9093.0.0/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
- llama_cpp_bin-9093.0.0/llama.cpp/requirements/requirements-pydantic.txt +3 -0
- llama_cpp_bin-9093.0.0/llama.cpp/requirements/requirements-server-bench.txt +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/requirements/requirements-test-tokenizer-random.txt +1 -0
- llama_cpp_bin-9093.0.0/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
- llama_cpp_bin-9093.0.0/llama.cpp/requirements.txt +13 -0
- llama_cpp_bin-9093.0.0/llama.cpp/scripts/jinja/requirements.txt +2 -0
- llama_cpp_bin-9093.0.0/llama.cpp/scripts/snapdragon/qdc/requirements.txt +22 -0
- llama_cpp_bin-9093.0.0/llama.cpp/scripts/xxd.cmake +16 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/CMakeLists.txt +60 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-adapter.cpp +497 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-adapter.h +91 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-arch.cpp +909 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-arch.h +639 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-batch.cpp +919 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-batch.h +173 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-chat.cpp +939 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-chat.h +74 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-context.cpp +3826 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-context.h +368 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-cparams.cpp +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-cparams.h +47 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-ext.h +90 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-grammar.cpp +1510 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-grammar.h +194 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-graph.cpp +2929 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-graph.h +1064 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-hparams.cpp +258 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-hparams.h +359 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-impl.cpp +171 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-impl.h +75 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-io.cpp +20 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-io.h +35 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-kv-cache-iswa.cpp +330 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-kv-cache-iswa.h +137 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-kv-cache.cpp +2502 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-kv-cache.h +420 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-kv-cells.h +533 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-memory-hybrid-iswa.cpp +275 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-memory-hybrid-iswa.h +140 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-memory-hybrid.cpp +268 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-memory-hybrid.h +139 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-memory-recurrent.cpp +1167 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-memory-recurrent.h +182 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-memory.cpp +59 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-memory.h +122 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-mmap.cpp +779 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-mmap.h +74 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-model-loader.cpp +1695 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-model-loader.h +207 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-model-saver.cpp +415 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-model-saver.h +44 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-model.cpp +2518 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-model.h +707 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-quant.cpp +1407 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-quant.h +1 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-sampler.cpp +3885 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-sampler.h +42 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-vocab.cpp +4115 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama-vocab.h +189 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/llama.cpp +578 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/afmoe.cpp +285 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/apertus.cpp +169 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/arcee.cpp +157 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/arctic.cpp +180 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/arwkv7.cpp +202 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/baichuan.cpp +155 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/bailingmoe.cpp +180 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/bailingmoe2.cpp +219 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/bert.cpp +233 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/bitnet.cpp +170 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/bloom.cpp +151 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/chameleon.cpp +204 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/chatglm.cpp +160 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/codeshell.cpp +152 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/cogvlm.cpp +157 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/cohere2.cpp +159 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/command-r.cpp +143 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/dbrx.cpp +154 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/deci.cpp +190 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/deepseek.cpp +194 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/deepseek2.cpp +438 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/deepseek2ocr.cpp +82 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/delta-net-base.cpp +445 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/dots1.cpp +192 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/dream.cpp +137 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/ernie4-5-moe.cpp +133 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/ernie4-5.cpp +164 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/eurobert.cpp +124 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/exaone-moe.cpp +246 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/exaone.cpp +136 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/exaone4.cpp +176 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/falcon-h1.cpp +209 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/falcon.cpp +161 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/gemma-embedding.cpp +178 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/gemma.cpp +139 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/gemma2.cpp +177 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/gemma3.cpp +225 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/gemma3n.cpp +459 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/gemma4.cpp +457 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/glm-dsa.cpp +155 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/glm4-moe.cpp +284 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/glm4.cpp +194 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/gpt2.cpp +147 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/gptneox.cpp +218 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/granite-hybrid.cpp +302 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/granite-moe.cpp +89 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/granite.cpp +274 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/grok.cpp +223 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/grovemoe.cpp +193 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/hunyuan-dense.cpp +6 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/hunyuan-moe.cpp +187 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/hunyuan-vl.cpp +189 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/internlm2.cpp +138 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/jais.cpp +132 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/jais2.cpp +161 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/jamba.cpp +198 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/jina-bert-v2.cpp +66 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/jina-bert-v3.cpp +69 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/kimi-linear.cpp +550 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/lfm2.cpp +275 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/lfm2moe.cpp +85 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/llada-moe.cpp +162 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/llada.cpp +156 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/llama-embed.cpp +6 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/llama.cpp +247 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/llama4.cpp +273 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/maincoder.cpp +150 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/mamba-base.cpp +289 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/mamba.cpp +137 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/mamba2.cpp +87 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/mimo2.cpp +240 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/minicpm.cpp +89 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/minicpm3.cpp +260 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/minimax-m2.cpp +167 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/mistral3.cpp +231 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/mistral4.cpp +6 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/models.h +1860 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/modern-bert.cpp +165 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/mpt.cpp +170 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/nemotron-h-moe.cpp +6 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/nemotron-h.cpp +258 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/nemotron.cpp +149 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/neo-bert.cpp +134 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/nomic-bert-moe.cpp +72 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/nomic-bert.cpp +72 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/olmo.cpp +142 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/olmo2.cpp +211 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/olmoe.cpp +173 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/openai-moe.cpp +169 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/openelm.cpp +171 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/orion.cpp +141 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/paddleocr.cpp +107 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/pangu-embed.cpp +161 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/phi2.cpp +142 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/phi3.cpp +196 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/phimoe.cpp +55 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/plamo.cpp +136 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/plamo2.cpp +422 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/plamo3.cpp +197 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/plm.cpp +213 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/qwen.cpp +140 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/qwen2.cpp +153 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/qwen2moe.cpp +193 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/qwen2vl.cpp +143 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/qwen3.cpp +156 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/qwen35.cpp +473 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/qwen35moe.cpp +527 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/qwen3moe.cpp +177 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/qwen3next.cpp +633 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/qwen3vl.cpp +172 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/qwen3vlmoe.cpp +189 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/refact.cpp +159 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/rnd1.cpp +176 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/rwkv6-base.cpp +164 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/rwkv6.cpp +185 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/rwkv6qwen2.cpp +167 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/rwkv7-base.cpp +137 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/rwkv7.cpp +211 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/seed-oss.cpp +150 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/smallthinker.cpp +190 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/smollm3.cpp +152 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/stablelm.cpp +172 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/starcoder.cpp +144 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/starcoder2.cpp +157 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/step35.cpp +269 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/t5.cpp +370 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/t5encoder.cpp +44 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/wavtokenizer-dec.cpp +264 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/models/xverse.cpp +135 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/unicode-data.cpp +7034 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/unicode-data.h +20 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/unicode.cpp +1275 -0
- llama_cpp_bin-9093.0.0/llama.cpp/src/unicode.h +111 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/CMakeLists.txt +302 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/export-graph-ops.cpp +226 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/get-model.cpp +21 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/get-model.h +2 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/gguf-model-data.cpp +740 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/gguf-model-data.h +50 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/peg-parser/simple-tokenize.cpp +37 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/peg-parser/simple-tokenize.h +6 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/peg-parser/test-basic.cpp +471 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/peg-parser/test-gbnf-generation.cpp +370 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/peg-parser/test-json-parser.cpp +109 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/peg-parser/test-json-serialization.cpp +28 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/peg-parser/test-python-dict-parser.cpp +318 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/peg-parser/test-unicode.cpp +446 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/peg-parser/tests.h +25 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-alloc.cpp +608 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-arg-parser.cpp +212 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-autorelease.cpp +24 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-backend-ops.cpp +9730 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-backend-sampler.cpp +1166 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-barrier.cpp +236 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-c.c +3 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-chat-auto-parser.cpp +1969 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-chat-peg-parser.cpp +983 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-chat-template.cpp +712 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-chat.cpp +4772 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-double-float.cpp +57 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-gbnf-validator.cpp +109 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-gguf-model-data.cpp +154 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-gguf.cpp +1365 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-grammar-integration.cpp +1493 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-grammar-llguidance.cpp +1204 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-grammar-parser.cpp +537 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-jinja.cpp +2514 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-json-partial.cpp +287 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-json-schema-to-grammar.cpp +1589 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-llama-archs.cpp +663 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-llama-grammar.cpp +406 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-log.cpp +43 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-model-load-cancel.cpp +27 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-mtmd-c-api.c +65 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-opt.cpp +1003 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-peg-parser.cpp +26 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-quant-type-selection.cpp +520 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-quantize-fns.cpp +196 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-quantize-perf.cpp +356 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-quantize-stats.cpp +427 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-reasoning-budget.cpp +260 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-regex-partial.cpp +288 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-rope.cpp +263 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-sampling.cpp +400 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-state-restore-fragmented.cpp +125 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-thread-safety.cpp +164 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-tokenizer-0.cpp +312 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-tokenizer-1-bpe.cpp +155 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/test-tokenizer-1-spm.cpp +125 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tests/testing.h +243 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/CMakeLists.txt +42 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/batched-bench/CMakeLists.txt +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/batched-bench/batched-bench.cpp +259 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/cli/CMakeLists.txt +10 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/cli/cli.cpp +652 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/completion/CMakeLists.txt +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/completion/completion.cpp +1003 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/cvector-generator/CMakeLists.txt +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/cvector-generator/completions.txt +582 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/cvector-generator/cvector-generator.cpp +515 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/cvector-generator/mean.hpp +48 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/cvector-generator/negative.txt +4 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/cvector-generator/pca.hpp +315 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/cvector-generator/positive.txt +4 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/export-lora/CMakeLists.txt +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/export-lora/export-lora.cpp +439 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/fit-params/CMakeLists.txt +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/fit-params/fit-params.cpp +74 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/gguf-split/CMakeLists.txt +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/gguf-split/gguf-split.cpp +589 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/imatrix/CMakeLists.txt +13 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/imatrix/imatrix.cpp +1318 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/llama-bench/CMakeLists.txt +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/llama-bench/llama-bench.cpp +2431 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/CMakeLists.txt +123 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/clip-graph.h +122 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/clip-impl.h +716 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/clip-model.h +586 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/clip.cpp +4220 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/clip.h +118 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/debug/mtmd-debug.cpp +232 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/debug/mtmd-debug.h +17 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/deprecation-warning.cpp +25 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/cogvlm.cpp +98 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/conformer.cpp +216 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/deepseekocr.cpp +324 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/dotsocr.cpp +49 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/gemma4a.cpp +288 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/gemma4v.cpp +151 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/glm4v.cpp +122 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/granite-speech.cpp +275 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/hunyuanocr.cpp +73 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/internvl.cpp +69 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/kimik25.cpp +101 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/kimivl.cpp +63 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/llama4.cpp +96 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/llava.cpp +374 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/minicpmv.cpp +405 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/mobilenetv5.cpp +451 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/models.h +183 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/nemotron-v2-vl.cpp +35 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/paddleocr.cpp +52 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/pixtral.cpp +86 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/qwen2vl.cpp +183 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/qwen3a.cpp +68 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/qwen3vl.cpp +193 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/siglip.cpp +94 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/step3vl.cpp +81 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/whisper-enc.cpp +137 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/yasa2.cpp +191 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/models/youtuvl.cpp +179 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/mtmd-audio.cpp +943 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/mtmd-audio.h +132 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/mtmd-cli.cpp +443 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/mtmd-helper.cpp +537 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/mtmd-helper.h +100 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/mtmd-image.cpp +1429 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/mtmd-image.h +179 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/mtmd.cpp +1525 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/mtmd.h +332 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/requirements.txt +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/tests/test-1-extracted.txt +42 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/mtmd/tests/tests-requirements.txt +5 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/parser/CMakeLists.txt +20 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/parser/debug-template-parser.cpp +462 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/parser/template-analysis.cpp +611 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/perplexity/CMakeLists.txt +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/perplexity/perplexity.cpp +2096 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/quantize/CMakeLists.txt +9 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/quantize/quantize.cpp +756 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/results/CMakeLists.txt +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/results/results.cpp +183 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/rpc/CMakeLists.txt +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/rpc/rpc-server.cpp +342 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/server/CMakeLists.txt +75 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/server/bench/requirements.txt +2 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/server/server-chat.cpp +630 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/server/server-chat.h +26 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/server/server-common.cpp +1586 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/server/server-common.h +373 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/server/server-context.cpp +4359 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/server/server-context.h +150 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/server/server-cors-proxy.h +67 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/server/server-http.cpp +700 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/server/server-http.h +94 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/server/server-models.cpp +1565 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/server/server-models.h +233 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/server/server-queue.cpp +450 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/server/server-queue.h +205 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/server/server-task.cpp +2127 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/server/server-task.h +645 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/server/server-tools.cpp +817 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/server/server-tools.h +26 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/server/server.cpp +363 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/server/tests/requirements.txt +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/tokenize/CMakeLists.txt +7 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/tokenize/tokenize.cpp +419 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/tts/CMakeLists.txt +8 -0
- llama_cpp_bin-9093.0.0/llama.cpp/tools/tts/tts.cpp +1096 -0
- llama_cpp_bin-9093.0.0/llama.cpp/vendor/cpp-httplib/CMakeLists.txt +195 -0
- llama_cpp_bin-9093.0.0/llama.cpp/vendor/cpp-httplib/httplib.cpp +16253 -0
- llama_cpp_bin-9093.0.0/llama.cpp/vendor/cpp-httplib/httplib.h +3860 -0
- llama_cpp_bin-9093.0.0/llama.cpp/vendor/miniaudio/miniaudio.h +95864 -0
- llama_cpp_bin-9093.0.0/llama.cpp/vendor/nlohmann/json.hpp +25526 -0
- llama_cpp_bin-9093.0.0/llama.cpp/vendor/nlohmann/json_fwd.hpp +187 -0
- llama_cpp_bin-9093.0.0/llama.cpp/vendor/sheredom/subprocess.h +1203 -0
- llama_cpp_bin-9093.0.0/llama.cpp/vendor/stb/stb_image.h +7988 -0
- llama_cpp_bin-9093.0.0/pyproject.toml +42 -0
- llama_cpp_bin-9093.0.0/setup.cfg +4 -0
- llama_cpp_bin-9093.0.0/setup.py +142 -0
- llama_cpp_bin-9093.0.0/src/llama_cpp_bin/__init__.py +51 -0
- llama_cpp_bin-9093.0.0/src/llama_cpp_bin/_version.py +24 -0
- llama_cpp_bin-9093.0.0/src/llama_cpp_bin/bin/.gitkeep +0 -0
- llama_cpp_bin-9093.0.0/src/llama_cpp_bin/cli.py +103 -0
- llama_cpp_bin-9093.0.0/src/llama_cpp_bin/core.py +64 -0
- llama_cpp_bin-9093.0.0/src/llama_cpp_bin.egg-info/PKG-INFO +69 -0
- llama_cpp_bin-9093.0.0/src/llama_cpp_bin.egg-info/SOURCES.txt +981 -0
- llama_cpp_bin-9093.0.0/src/llama_cpp_bin.egg-info/dependency_links.txt +1 -0
- llama_cpp_bin-9093.0.0/src/llama_cpp_bin.egg-info/entry_points.txt +2 -0
- llama_cpp_bin-9093.0.0/src/llama_cpp_bin.egg-info/requires.txt +4 -0
- llama_cpp_bin-9093.0.0/src/llama_cpp_bin.egg-info/top_level.txt +2 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import urllib.request
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
REPO = os.environ["REPO"]
|
|
9
|
+
TAG = os.environ["TAG"]
|
|
10
|
+
TOKEN = os.environ["GITHUB_TOKEN"]
|
|
11
|
+
|
|
12
|
+
API_URL = f"https://api.github.com/repos/{REPO}/releases/tags/{TAG}"
|
|
13
|
+
HEADERS = {
|
|
14
|
+
"Authorization": f"Bearer {TOKEN}",
|
|
15
|
+
"Accept": "application/vnd.github+json",
|
|
16
|
+
"X-GitHub-Api-Version": "2022-11-28",
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def fetch_release():
|
|
21
|
+
req = urllib.request.Request(API_URL, headers=HEADERS)
|
|
22
|
+
with urllib.request.urlopen(req) as resp:
|
|
23
|
+
return json.loads(resp.read())
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def generate_index():
|
|
27
|
+
release = fetch_release()
|
|
28
|
+
assets = [
|
|
29
|
+
a
|
|
30
|
+
for a in release.get("assets", [])
|
|
31
|
+
if a["name"].endswith(".whl") or a["name"].endswith(".tar.gz")
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
site = Path("site")
|
|
35
|
+
simple = site / "simple"
|
|
36
|
+
pkg_dir = simple / "llama-cpp-bin"
|
|
37
|
+
pkg_dir.mkdir(parents=True, exist_ok=True)
|
|
38
|
+
|
|
39
|
+
root_html = (
|
|
40
|
+
"<!DOCTYPE html>\n"
|
|
41
|
+
"<html>\n"
|
|
42
|
+
"<body>\n"
|
|
43
|
+
'<a href="llama-cpp-bin/">llama-cpp-bin</a>\n'
|
|
44
|
+
"</body>\n"
|
|
45
|
+
"</html>"
|
|
46
|
+
)
|
|
47
|
+
(simple / "index.html").write_text(root_html, encoding="utf-8")
|
|
48
|
+
|
|
49
|
+
links = []
|
|
50
|
+
for asset in assets:
|
|
51
|
+
links.append(f'<a href="{asset["browser_download_url"]}">{asset["name"]}</a>')
|
|
52
|
+
|
|
53
|
+
pkg_html = (
|
|
54
|
+
"<!DOCTYPE html>\n"
|
|
55
|
+
"<html>\n"
|
|
56
|
+
"<body>\n"
|
|
57
|
+
+ "\n".join(links)
|
|
58
|
+
+ "\n</body>\n"
|
|
59
|
+
"</html>"
|
|
60
|
+
)
|
|
61
|
+
(pkg_dir / "index.html").write_text(pkg_html, encoding="utf-8")
|
|
62
|
+
|
|
63
|
+
print(f"Generated index with {len(assets)} assets")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
if __name__ == "__main__":
|
|
67
|
+
generate_index()
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
name: Build Everything
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*'
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
inputs:
|
|
9
|
+
version:
|
|
10
|
+
description: 'Git ref to build (tag recommended for releases, e.g. v0.1.0)'
|
|
11
|
+
required: true
|
|
12
|
+
type: string
|
|
13
|
+
|
|
14
|
+
permissions:
|
|
15
|
+
contents: write
|
|
16
|
+
|
|
17
|
+
jobs:
|
|
18
|
+
build_cuda_wheels_windows:
|
|
19
|
+
name: CUDA Wheels Windows
|
|
20
|
+
uses: ./.github/workflows/build-wheels-cuda.yml
|
|
21
|
+
with:
|
|
22
|
+
version: ${{ github.event_name == 'push' && github.ref_name || inputs.version }}
|
|
23
|
+
config: 'os:windows-2022'
|
|
24
|
+
|
|
25
|
+
build_cuda_wheels_linux:
|
|
26
|
+
name: CUDA Wheels Linux
|
|
27
|
+
uses: ./.github/workflows/build-wheels-cuda.yml
|
|
28
|
+
with:
|
|
29
|
+
version: ${{ github.event_name == 'push' && github.ref_name || inputs.version }}
|
|
30
|
+
config: 'os:ubuntu-22.04'
|
|
31
|
+
|
|
32
|
+
build_vulkan_wheels_windows:
|
|
33
|
+
name: Vulkan Wheels Windows
|
|
34
|
+
uses: ./.github/workflows/build-wheels-vulkan.yml
|
|
35
|
+
with:
|
|
36
|
+
version: ${{ github.event_name == 'push' && github.ref_name || inputs.version }}
|
|
37
|
+
config: 'os:windows-2022'
|
|
38
|
+
|
|
39
|
+
build_vulkan_wheels_linux:
|
|
40
|
+
name: Vulkan Wheels Linux
|
|
41
|
+
uses: ./.github/workflows/build-wheels-vulkan.yml
|
|
42
|
+
with:
|
|
43
|
+
version: ${{ github.event_name == 'push' && github.ref_name || inputs.version }}
|
|
44
|
+
config: 'os:ubuntu-22.04'
|
|
45
|
+
|
|
46
|
+
build_wheels_cpu:
|
|
47
|
+
name: CPU-only Wheels
|
|
48
|
+
uses: ./.github/workflows/build-wheels-cpu.yml
|
|
49
|
+
with:
|
|
50
|
+
version: ${{ github.event_name == 'push' && github.ref_name || inputs.version }}
|
|
51
|
+
|
|
52
|
+
build_wheels_macos:
|
|
53
|
+
name: MacOS Wheels
|
|
54
|
+
uses: ./.github/workflows/build-wheels-macos.yml
|
|
55
|
+
with:
|
|
56
|
+
version: ${{ github.event_name == 'push' && github.ref_name || inputs.version }}
|
|
57
|
+
|
|
58
|
+
build_wheels_rocm:
|
|
59
|
+
name: ROCm Wheels
|
|
60
|
+
uses: ./.github/workflows/build-wheels-rocm.yml
|
|
61
|
+
with:
|
|
62
|
+
version: ${{ github.event_name == 'push' && github.ref_name || inputs.version }}
|
|
63
|
+
|
|
64
|
+
build-sdist:
|
|
65
|
+
name: Build Source Distribution
|
|
66
|
+
runs-on: ubuntu-latest
|
|
67
|
+
steps:
|
|
68
|
+
- uses: actions/checkout@v6
|
|
69
|
+
with:
|
|
70
|
+
submodules: 'recursive'
|
|
71
|
+
- uses: actions/setup-python@v6
|
|
72
|
+
with:
|
|
73
|
+
python-version: "3.11"
|
|
74
|
+
- run: python -m pip install build
|
|
75
|
+
- run: python -m build --sdist
|
|
76
|
+
- uses: actions/upload-artifact@v6
|
|
77
|
+
with:
|
|
78
|
+
name: sdist
|
|
79
|
+
path: dist/*.tar.gz
|
|
80
|
+
- name: Upload sdist to release
|
|
81
|
+
uses: svenstaro/upload-release-action@2.7.0
|
|
82
|
+
with:
|
|
83
|
+
repo_token: ${{ secrets.GITHUB_TOKEN }}
|
|
84
|
+
file: ./dist/*.tar.gz
|
|
85
|
+
tag: ${{ github.event_name == 'push' && github.ref_name || inputs.version }}
|
|
86
|
+
file_glob: true
|
|
87
|
+
overwrite: true
|
|
88
|
+
|
|
89
|
+
publish-pypi:
|
|
90
|
+
name: Publish to PyPI
|
|
91
|
+
needs: build-sdist
|
|
92
|
+
runs-on: ubuntu-latest
|
|
93
|
+
environment: pypi
|
|
94
|
+
permissions:
|
|
95
|
+
id-token: write
|
|
96
|
+
steps:
|
|
97
|
+
- uses: actions/download-artifact@v6
|
|
98
|
+
with:
|
|
99
|
+
name: sdist
|
|
100
|
+
path: dist
|
|
101
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
102
|
+
with:
|
|
103
|
+
packages-dir: dist
|
|
104
|
+
|
|
105
|
+
generate-index:
|
|
106
|
+
name: Generate PEP 503 Index
|
|
107
|
+
needs:
|
|
108
|
+
- build_wheels_cpu
|
|
109
|
+
- build_wheels_macos
|
|
110
|
+
- build_cuda_wheels_linux
|
|
111
|
+
- build_cuda_wheels_windows
|
|
112
|
+
- build_wheels_rocm
|
|
113
|
+
- build_vulkan_wheels_linux
|
|
114
|
+
- build_vulkan_wheels_windows
|
|
115
|
+
runs-on: ubuntu-latest
|
|
116
|
+
steps:
|
|
117
|
+
- uses: actions/checkout@v6
|
|
118
|
+
- uses: actions/setup-python@v6
|
|
119
|
+
with:
|
|
120
|
+
python-version: "3.11"
|
|
121
|
+
- name: Generate index
|
|
122
|
+
env:
|
|
123
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
124
|
+
REPO: ${{ github.repository }}
|
|
125
|
+
TAG: ${{ github.event_name == 'push' && github.ref_name || inputs.version }}
|
|
126
|
+
run: python .github/scripts/generate-pep503-index.py
|
|
127
|
+
- name: Deploy to GitHub Pages
|
|
128
|
+
uses: peaceiris/actions-gh-pages@v4
|
|
129
|
+
with:
|
|
130
|
+
github_token: ${{ secrets.GITHUB_TOKEN }}
|
|
131
|
+
publish_dir: ./site
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
name: Build CPU Wheels
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
inputs:
|
|
6
|
+
version:
|
|
7
|
+
description: 'Git ref to build (tag recommended for releases, e.g. v0.1.0)'
|
|
8
|
+
required: true
|
|
9
|
+
type: string
|
|
10
|
+
config:
|
|
11
|
+
description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
|
|
12
|
+
default: 'Default'
|
|
13
|
+
required: false
|
|
14
|
+
type: string
|
|
15
|
+
exclude:
|
|
16
|
+
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
|
|
17
|
+
default: 'None'
|
|
18
|
+
required: false
|
|
19
|
+
type: string
|
|
20
|
+
workflow_call:
|
|
21
|
+
inputs:
|
|
22
|
+
version:
|
|
23
|
+
description: 'Git ref to build'
|
|
24
|
+
required: true
|
|
25
|
+
type: string
|
|
26
|
+
config:
|
|
27
|
+
description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
|
|
28
|
+
default: 'Default'
|
|
29
|
+
required: false
|
|
30
|
+
type: string
|
|
31
|
+
exclude:
|
|
32
|
+
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
|
|
33
|
+
default: 'None'
|
|
34
|
+
required: false
|
|
35
|
+
type: string
|
|
36
|
+
|
|
37
|
+
permissions:
|
|
38
|
+
contents: write
|
|
39
|
+
|
|
40
|
+
jobs:
|
|
41
|
+
define_matrix:
|
|
42
|
+
name: Define Build Matrix
|
|
43
|
+
runs-on: ubuntu-latest
|
|
44
|
+
outputs:
|
|
45
|
+
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
|
46
|
+
defaults:
|
|
47
|
+
run:
|
|
48
|
+
shell: pwsh
|
|
49
|
+
env:
|
|
50
|
+
CONFIGIN: ${{ inputs.config }}
|
|
51
|
+
EXCLUDEIN: ${{ inputs.exclude }}
|
|
52
|
+
|
|
53
|
+
steps:
|
|
54
|
+
- name: Define Job Output
|
|
55
|
+
id: set-matrix
|
|
56
|
+
run: |
|
|
57
|
+
$matrix = @{
|
|
58
|
+
'os' = @('ubuntu-22.04', 'windows-2022')
|
|
59
|
+
'pyver' = @("3.11")
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}
|
|
63
|
+
|
|
64
|
+
if ($env:EXCLUDEIN -ne 'None') {
|
|
65
|
+
$exclusions = @()
|
|
66
|
+
$exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData
|
|
67
|
+
$matrix['exclude'] = $exclusions
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
$matrixOut = ConvertTo-Json $matrix -Compress
|
|
71
|
+
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
|
|
72
|
+
|
|
73
|
+
build_wheels:
|
|
74
|
+
name: ${{ matrix.os }} CPU
|
|
75
|
+
needs: define_matrix
|
|
76
|
+
runs-on: ${{ matrix.os }}
|
|
77
|
+
strategy:
|
|
78
|
+
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
|
|
79
|
+
defaults:
|
|
80
|
+
run:
|
|
81
|
+
shell: pwsh
|
|
82
|
+
steps:
|
|
83
|
+
- uses: actions/checkout@v6
|
|
84
|
+
with:
|
|
85
|
+
repository: 'vladlearns/llama-cpp-bin'
|
|
86
|
+
ref: ${{ inputs.version }}
|
|
87
|
+
submodules: 'recursive'
|
|
88
|
+
|
|
89
|
+
- name: Install Ninja
|
|
90
|
+
if: runner.os == 'Windows'
|
|
91
|
+
run: choco install ninja
|
|
92
|
+
|
|
93
|
+
- name: Setup MSVC
|
|
94
|
+
if: runner.os == 'Windows'
|
|
95
|
+
uses: ilammy/msvc-dev-cmd@v1
|
|
96
|
+
|
|
97
|
+
- uses: actions/setup-python@v6
|
|
98
|
+
with:
|
|
99
|
+
python-version: ${{ matrix.pyver }}
|
|
100
|
+
|
|
101
|
+
- name: Install Dependencies
|
|
102
|
+
run: |
|
|
103
|
+
if ($IsLinux) {
|
|
104
|
+
sudo apt-get update
|
|
105
|
+
sudo apt-get install -y build-essential libssl-dev
|
|
106
|
+
}
|
|
107
|
+
python -m pip install build
|
|
108
|
+
|
|
109
|
+
- name: Build Wheel
|
|
110
|
+
run: |
|
|
111
|
+
$env:VERBOSE = '1'
|
|
112
|
+
$env:CMAKE_ARGS = "-DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=off -DGGML_BACKEND_DL=on -DGGML_CPU_ALL_VARIANTS=on -DGGML_RPC=on -DLLAMA_BUILD_TESTS=off -DLLAMA_BUILD_EXAMPLES=off"
|
|
113
|
+
|
|
114
|
+
if ($IsWindows) {
|
|
115
|
+
$env:CMAKE_ARGS += " -G 'Ninja' -DLLAMA_BUILD_BORINGSSL=ON"
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
$env:LLAMA_CPP_BUILD_JOBS = "2"
|
|
119
|
+
|
|
120
|
+
$buildtag = "+cpu"
|
|
121
|
+
python -m build --wheel -C--build-option=egg_info "-C--build-option=--tag-build=$buildtag"
|
|
122
|
+
|
|
123
|
+
- name: Upload files to a GitHub release
|
|
124
|
+
id: upload-release
|
|
125
|
+
uses: svenstaro/upload-release-action@2.7.0
|
|
126
|
+
continue-on-error: true
|
|
127
|
+
with:
|
|
128
|
+
repo_token: ${{ secrets.GITHUB_TOKEN }}
|
|
129
|
+
file: ./dist/*.whl
|
|
130
|
+
tag: ${{ inputs.version }}
|
|
131
|
+
release_name: ${{ inputs.version }}
|
|
132
|
+
file_glob: true
|
|
133
|
+
make_latest: false
|
|
134
|
+
overwrite: true
|
|
135
|
+
|
|
136
|
+
- uses: actions/upload-artifact@v6
|
|
137
|
+
if: steps.upload-release.outcome == 'failure'
|
|
138
|
+
with:
|
|
139
|
+
name: cpu-${{ runner.os }}
|
|
140
|
+
path: ./dist/*.whl
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
name: Build CUDA Wheels
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
inputs:
|
|
6
|
+
version:
|
|
7
|
+
description: 'Git ref to build (tag recommended for releases, e.g. v0.1.0)'
|
|
8
|
+
required: true
|
|
9
|
+
type: string
|
|
10
|
+
config:
|
|
11
|
+
description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
|
|
12
|
+
default: 'Default'
|
|
13
|
+
required: false
|
|
14
|
+
type: string
|
|
15
|
+
exclude:
|
|
16
|
+
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
|
|
17
|
+
default: 'None'
|
|
18
|
+
required: false
|
|
19
|
+
type: string
|
|
20
|
+
workflow_call:
|
|
21
|
+
inputs:
|
|
22
|
+
version:
|
|
23
|
+
description: 'Git ref to build'
|
|
24
|
+
required: true
|
|
25
|
+
type: string
|
|
26
|
+
config:
|
|
27
|
+
description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
|
|
28
|
+
default: 'Default'
|
|
29
|
+
required: false
|
|
30
|
+
type: string
|
|
31
|
+
exclude:
|
|
32
|
+
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
|
|
33
|
+
default: 'None'
|
|
34
|
+
required: false
|
|
35
|
+
type: string
|
|
36
|
+
|
|
37
|
+
permissions:
|
|
38
|
+
contents: write
|
|
39
|
+
|
|
40
|
+
jobs:
|
|
41
|
+
define_matrix:
|
|
42
|
+
name: Define Build Matrix
|
|
43
|
+
runs-on: ubuntu-latest
|
|
44
|
+
outputs:
|
|
45
|
+
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
|
46
|
+
defaults:
|
|
47
|
+
run:
|
|
48
|
+
shell: pwsh
|
|
49
|
+
env:
|
|
50
|
+
CONFIGIN: ${{ inputs.config }}
|
|
51
|
+
EXCLUDEIN: ${{ inputs.exclude }}
|
|
52
|
+
|
|
53
|
+
steps:
|
|
54
|
+
- name: Define Job Output
|
|
55
|
+
id: set-matrix
|
|
56
|
+
run: |
|
|
57
|
+
$matrix = @{
|
|
58
|
+
'os' = @('ubuntu-22.04', 'windows-2022')
|
|
59
|
+
'pyver' = @("3.11")
|
|
60
|
+
'cuda' = @("12.4.1", "13.1.0")
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}
|
|
64
|
+
|
|
65
|
+
if ($env:EXCLUDEIN -ne 'None') {
|
|
66
|
+
$exclusions = @()
|
|
67
|
+
$exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData
|
|
68
|
+
$matrix['exclude'] = $exclusions
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
$matrixOut = ConvertTo-Json $matrix -Compress
|
|
72
|
+
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
|
|
73
|
+
|
|
74
|
+
build_wheels:
|
|
75
|
+
name: ${{ matrix.os }} CUDA ${{ matrix.cuda }}
|
|
76
|
+
needs: define_matrix
|
|
77
|
+
runs-on: ${{ matrix.os }}
|
|
78
|
+
strategy:
|
|
79
|
+
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
|
|
80
|
+
defaults:
|
|
81
|
+
run:
|
|
82
|
+
shell: pwsh
|
|
83
|
+
env:
|
|
84
|
+
CUDAVER: ${{ matrix.cuda }}
|
|
85
|
+
|
|
86
|
+
steps:
|
|
87
|
+
- name: Free Disk Space
|
|
88
|
+
uses: jlumbroso/free-disk-space@v1.3.1
|
|
89
|
+
if: runner.os == 'Linux'
|
|
90
|
+
with:
|
|
91
|
+
tool-cache: true
|
|
92
|
+
android: true
|
|
93
|
+
dotnet: true
|
|
94
|
+
haskell: true
|
|
95
|
+
large-packages: false
|
|
96
|
+
swap-storage: true
|
|
97
|
+
|
|
98
|
+
- uses: actions/checkout@v6
|
|
99
|
+
with:
|
|
100
|
+
repository: 'vladlearns/llama-cpp-bin'
|
|
101
|
+
ref: ${{ inputs.version }}
|
|
102
|
+
submodules: 'recursive'
|
|
103
|
+
|
|
104
|
+
- name: ccache
|
|
105
|
+
uses: hendrikmuhs/ccache-action@v1.2.20
|
|
106
|
+
with:
|
|
107
|
+
key: cuda-${{ matrix.cuda }}-${{ matrix.os }}
|
|
108
|
+
variant: ccache
|
|
109
|
+
evict-old-files: 14d
|
|
110
|
+
|
|
111
|
+
- uses: actions/setup-python@v6
|
|
112
|
+
with:
|
|
113
|
+
python-version: ${{ matrix.pyver }}
|
|
114
|
+
|
|
115
|
+
- name: Install CUDA Toolkit
|
|
116
|
+
uses: Jimver/cuda-toolkit@v0.2.30
|
|
117
|
+
id: cuda-toolkit
|
|
118
|
+
with:
|
|
119
|
+
cuda: ${{ matrix.cuda }}
|
|
120
|
+
method: 'network'
|
|
121
|
+
|
|
122
|
+
- name: Setup CUDA Environment
|
|
123
|
+
run: |
|
|
124
|
+
echo "CUDA_PATH=$env:CUDA_PATH" >> $env:GITHUB_ENV
|
|
125
|
+
echo "CUDA_HOME=$env:CUDA_PATH" >> $env:GITHUB_ENV
|
|
126
|
+
if ($IsLinux) {
|
|
127
|
+
echo "LD_LIBRARY_PATH=$env:CUDA_PATH/lib64:$env:LD_LIBRARY_PATH" >> $env:GITHUB_ENV
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
- name: Verify CUDA
|
|
131
|
+
run: nvcc --version
|
|
132
|
+
|
|
133
|
+
- name: Install Ninja
|
|
134
|
+
if: runner.os == 'Windows'
|
|
135
|
+
run: choco install ninja
|
|
136
|
+
|
|
137
|
+
- name: Setup MSVC
|
|
138
|
+
if: runner.os == 'Windows'
|
|
139
|
+
uses: ilammy/msvc-dev-cmd@v1
|
|
140
|
+
|
|
141
|
+
- name: Install Dependencies
|
|
142
|
+
run: |
|
|
143
|
+
if ($IsLinux) {
|
|
144
|
+
sudo apt-get update
|
|
145
|
+
sudo apt-get install -y build-essential libssl-dev
|
|
146
|
+
}
|
|
147
|
+
python -m pip install build
|
|
148
|
+
|
|
149
|
+
- name: Build Wheel
|
|
150
|
+
run: |
|
|
151
|
+
$cudaVersion = $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','')
|
|
152
|
+
|
|
153
|
+
$env:VERBOSE = '1'
|
|
154
|
+
$env:CMAKE_ARGS = '-DGGML_CUDA=on -DGGML_CUDA_CUB_3DOT2=on'
|
|
155
|
+
$env:CMAKE_ARGS = "-DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=off -DGGML_BACKEND_DL=on -DGGML_CPU_ALL_VARIANTS=on -DGGML_RPC=on -DLLAMA_BUILD_TESTS=off -DLLAMA_BUILD_EXAMPLES=off $env:CMAKE_ARGS"
|
|
156
|
+
|
|
157
|
+
if ($IsWindows) {
|
|
158
|
+
$env:CMAKE_ARGS += " -G 'Ninja' -DLLAMA_BUILD_BORINGSSL=ON"
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
$env:LLAMA_CPP_BUILD_JOBS = "2"
|
|
162
|
+
|
|
163
|
+
# Copy CUDA libraries
|
|
164
|
+
$binDir = "src/llama_cpp_bin/bin"
|
|
165
|
+
New-Item -ItemType Directory -Force -Path $binDir | Out-Null
|
|
166
|
+
|
|
167
|
+
$cudaMajor = $env:CUDAVER.Split('.')[0]
|
|
168
|
+
|
|
169
|
+
if ($IsLinux) {
|
|
170
|
+
$libsToFind = @("libcudart.so.$cudaMajor", "libcublas.so.$cudaMajor", "libcublasLt.so.$cudaMajor")
|
|
171
|
+
|
|
172
|
+
foreach ($lib in $libsToFind) {
|
|
173
|
+
$foundFiles = Get-ChildItem -Path "$env:CUDA_PATH/lib64" -Filter "$lib*" -File | Select-Object -First 1
|
|
174
|
+
if ($foundFiles) {
|
|
175
|
+
bash -c "cp -L '$($foundFiles.FullName)' '$binDir/$lib'"
|
|
176
|
+
Write-Host "Copied $($foundFiles.FullName) to $binDir/$lib"
|
|
177
|
+
} else {
|
|
178
|
+
Write-Warning "Could not find $lib"
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
} else {
|
|
182
|
+
$libsToFind = @("cublas64_$cudaMajor.dll", "cudart64_$cudaMajor.dll", "cublasLt64_$cudaMajor.dll")
|
|
183
|
+
|
|
184
|
+
foreach ($lib in $libsToFind) {
|
|
185
|
+
$foundFiles = Get-ChildItem -Path "$env:CUDA_PATH" -Filter $lib -Recurse -File | Select-Object -First 1
|
|
186
|
+
if ($foundFiles) {
|
|
187
|
+
Copy-Item -Path $foundFiles.FullName -Destination "$binDir/$lib"
|
|
188
|
+
Write-Host "Copied $($foundFiles.FullName) to $binDir/$lib"
|
|
189
|
+
} else {
|
|
190
|
+
Write-Warning "Could not find $lib"
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
$buildtag = "+cu$cudaVersion"
|
|
196
|
+
python -m build --wheel -C--build-option=egg_info "-C--build-option=--tag-build=$buildtag"
|
|
197
|
+
|
|
198
|
+
- name: Upload files to a GitHub release
|
|
199
|
+
id: upload-release
|
|
200
|
+
uses: svenstaro/upload-release-action@2.7.0
|
|
201
|
+
continue-on-error: true
|
|
202
|
+
with:
|
|
203
|
+
repo_token: ${{ secrets.GITHUB_TOKEN }}
|
|
204
|
+
file: ./dist/*.whl
|
|
205
|
+
tag: ${{ inputs.version }}
|
|
206
|
+
release_name: ${{ inputs.version }}
|
|
207
|
+
file_glob: true
|
|
208
|
+
make_latest: false
|
|
209
|
+
overwrite: true
|
|
210
|
+
|
|
211
|
+
- uses: actions/upload-artifact@v6
|
|
212
|
+
if: steps.upload-release.outcome == 'failure'
|
|
213
|
+
with:
|
|
214
|
+
name: cuda-${{ matrix.cuda }}-${{ runner.os }}
|
|
215
|
+
path: ./dist/*.whl
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
name: Build macOS Wheels
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
inputs:
|
|
6
|
+
version:
|
|
7
|
+
description: 'Git ref to build (tag recommended for releases, e.g. v0.1.0)'
|
|
8
|
+
required: true
|
|
9
|
+
type: string
|
|
10
|
+
workflow_call:
|
|
11
|
+
inputs:
|
|
12
|
+
version:
|
|
13
|
+
description: 'Git ref to build'
|
|
14
|
+
required: true
|
|
15
|
+
type: string
|
|
16
|
+
|
|
17
|
+
permissions:
|
|
18
|
+
contents: write
|
|
19
|
+
|
|
20
|
+
jobs:
|
|
21
|
+
build_wheels_arm64:
|
|
22
|
+
name: macOS ARM64 (Metal)
|
|
23
|
+
runs-on: macos-14
|
|
24
|
+
|
|
25
|
+
steps:
|
|
26
|
+
- uses: actions/checkout@v6
|
|
27
|
+
with:
|
|
28
|
+
repository: 'vladlearns/llama-cpp-bin'
|
|
29
|
+
ref: ${{ inputs.version }}
|
|
30
|
+
submodules: 'recursive'
|
|
31
|
+
|
|
32
|
+
- uses: actions/setup-python@v6
|
|
33
|
+
with:
|
|
34
|
+
python-version: "3.11"
|
|
35
|
+
|
|
36
|
+
- name: Install Dependencies
|
|
37
|
+
run: |
|
|
38
|
+
python -m pip install build cmake
|
|
39
|
+
|
|
40
|
+
- name: Build Wheel
|
|
41
|
+
run: |
|
|
42
|
+
export CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=off -DGGML_BACKEND_DL=on -DGGML_METAL=on -DGGML_METAL_USE_BF16=on -DGGML_METAL_EMBED_LIBRARY=on -DGGML_RPC=on -DLLAMA_BUILD_BORINGSSL=ON -DLLAMA_BUILD_TESTS=off -DLLAMA_BUILD_EXAMPLES=off"
|
|
43
|
+
export MACOSX_DEPLOYMENT_TARGET="13.3"
|
|
44
|
+
export CMAKE_OSX_ARCHITECTURES="arm64"
|
|
45
|
+
export ARCHFLAGS="-arch arm64"
|
|
46
|
+
|
|
47
|
+
VERBOSE=1 python -m build --wheel
|
|
48
|
+
|
|
49
|
+
for file in ./dist/*universal2.whl; do
|
|
50
|
+
mv "$file" "${file/universal2/arm64}"
|
|
51
|
+
done
|
|
52
|
+
|
|
53
|
+
- name: Upload files to a GitHub release
|
|
54
|
+
id: upload-release
|
|
55
|
+
uses: svenstaro/upload-release-action@2.7.0
|
|
56
|
+
continue-on-error: true
|
|
57
|
+
with:
|
|
58
|
+
repo_token: ${{ secrets.GITHUB_TOKEN }}
|
|
59
|
+
file: ./dist/*.whl
|
|
60
|
+
tag: ${{ inputs.version }}
|
|
61
|
+
release_name: ${{ inputs.version }}
|
|
62
|
+
file_glob: true
|
|
63
|
+
make_latest: false
|
|
64
|
+
overwrite: true
|
|
65
|
+
|
|
66
|
+
- uses: actions/upload-artifact@v6
|
|
67
|
+
if: steps.upload-release.outcome == 'failure'
|
|
68
|
+
with:
|
|
69
|
+
name: macos-arm64
|
|
70
|
+
path: ./dist/*.whl
|
|
71
|
+
|
|
72
|
+
build_wheels_x64:
|
|
73
|
+
name: macOS x64
|
|
74
|
+
runs-on: macos-15-intel
|
|
75
|
+
|
|
76
|
+
steps:
|
|
77
|
+
- uses: actions/checkout@v6
|
|
78
|
+
with:
|
|
79
|
+
repository: 'vladlearns/llama-cpp-bin'
|
|
80
|
+
ref: ${{ inputs.version }}
|
|
81
|
+
submodules: 'recursive'
|
|
82
|
+
|
|
83
|
+
- uses: actions/setup-python@v6
|
|
84
|
+
with:
|
|
85
|
+
python-version: "3.11"
|
|
86
|
+
|
|
87
|
+
- name: Install Dependencies
|
|
88
|
+
run: |
|
|
89
|
+
python -m pip install build cmake
|
|
90
|
+
|
|
91
|
+
- name: Build Wheel
|
|
92
|
+
run: |
|
|
93
|
+
export CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=off -DGGML_BACKEND_DL=on -DGGML_CPU_ALL_VARIANTS=on -DGGML_METAL=off -DGGML_RPC=on -DLLAMA_BUILD_BORINGSSL=ON -DLLAMA_BUILD_TESTS=off -DLLAMA_BUILD_EXAMPLES=off"
|
|
94
|
+
export MACOSX_DEPLOYMENT_TARGET="13.3"
|
|
95
|
+
export CMAKE_OSX_ARCHITECTURES="x86_64"
|
|
96
|
+
export ARCHFLAGS="-arch x86_64"
|
|
97
|
+
|
|
98
|
+
VERBOSE=1 python -m build --wheel
|
|
99
|
+
|
|
100
|
+
for file in ./dist/*universal2.whl; do
|
|
101
|
+
mv "$file" "${file/universal2/x86_64}"
|
|
102
|
+
done
|
|
103
|
+
|
|
104
|
+
- name: Upload files to a GitHub release
|
|
105
|
+
id: upload-release
|
|
106
|
+
uses: svenstaro/upload-release-action@2.7.0
|
|
107
|
+
continue-on-error: true
|
|
108
|
+
with:
|
|
109
|
+
repo_token: ${{ secrets.GITHUB_TOKEN }}
|
|
110
|
+
file: ./dist/*.whl
|
|
111
|
+
tag: ${{ inputs.version }}
|
|
112
|
+
release_name: ${{ inputs.version }}
|
|
113
|
+
file_glob: true
|
|
114
|
+
make_latest: false
|
|
115
|
+
overwrite: true
|
|
116
|
+
|
|
117
|
+
- uses: actions/upload-artifact@v6
|
|
118
|
+
if: steps.upload-release.outcome == 'failure'
|
|
119
|
+
with:
|
|
120
|
+
name: macos-x64
|
|
121
|
+
path: ./dist/*.whl
|