@fugood/llama.node 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +5 -2
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +8 -1
- package/package.json +1 -1
- package/patches/llama.patch +12 -12
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/LlamaContext.cpp +33 -1
- package/src/LlamaContext.h +1 -0
- package/src/LoadSessionWorker.cpp +1 -0
- package/src/llama.cpp/.github/workflows/bench.yml +310 -0
- package/src/llama.cpp/.github/workflows/build.yml +1315 -0
- package/src/llama.cpp/.github/workflows/close-issue.yml +23 -0
- package/src/llama.cpp/.github/workflows/docker.yml +116 -0
- package/src/llama.cpp/.github/workflows/editorconfig.yml +27 -0
- package/src/llama.cpp/.github/workflows/gguf-publish.yml +44 -0
- package/src/llama.cpp/.github/workflows/labeler.yml +17 -0
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +65 -0
- package/src/llama.cpp/.github/workflows/nix-ci.yml +72 -0
- package/src/llama.cpp/.github/workflows/nix-flake-update.yml +22 -0
- package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +36 -0
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +35 -0
- package/src/llama.cpp/.github/workflows/python-lint.yml +23 -0
- package/src/llama.cpp/.github/workflows/python-type-check.yml +38 -0
- package/src/llama.cpp/.github/workflows/server.yml +183 -0
- package/src/llama.cpp/CMakeLists.txt +91 -1245
- package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +1 -1
- package/src/llama.cpp/cmake/build-info.cmake +58 -0
- package/src/llama.cpp/cmake/git-vars.cmake +22 -0
- package/src/llama.cpp/common/CMakeLists.txt +4 -3
- package/src/llama.cpp/common/build-info.cpp.in +4 -0
- package/src/llama.cpp/common/common.cpp +1116 -877
- package/src/llama.cpp/common/common.h +191 -77
- package/src/llama.cpp/common/grammar-parser.cpp +118 -31
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +346 -65
- package/src/llama.cpp/common/log.h +1 -1
- package/src/llama.cpp/common/ngram-cache.h +10 -3
- package/src/llama.cpp/common/sampling.cpp +19 -10
- package/src/llama.cpp/docs/build.md +353 -0
- package/src/llama.cpp/examples/CMakeLists.txt +22 -22
- package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +6 -6
- package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +52 -55
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +20 -72
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/chat-13B.bat +57 -0
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/{finetune → cvector-generator}/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/cvector-generator/completions.txt +582 -0
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +503 -0
- package/src/llama.cpp/examples/cvector-generator/mean.hpp +48 -0
- package/src/llama.cpp/examples/cvector-generator/negative.txt +4 -0
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +325 -0
- package/src/llama.cpp/examples/cvector-generator/positive.txt +4 -0
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +35 -0
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +94 -46
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +4 -6
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +344 -386
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +30 -25
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf/gguf.cpp +5 -0
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +15 -0
- package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +46 -0
- package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +295 -0
- package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +52 -0
- package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +221 -0
- package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +24 -0
- package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +42 -0
- package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +7093 -0
- package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +693 -0
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +3 -3
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +6 -2
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +137 -176
- package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +38 -153
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +175 -94
- package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +65 -0
- package/src/llama.cpp/examples/llama.android/build.gradle.kts +6 -0
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +68 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +11 -7
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +2 -2
- package/src/llama.cpp/examples/llama.android/settings.gradle.kts +18 -0
- package/src/llama.cpp/examples/llava/CMakeLists.txt +6 -5
- package/src/llama.cpp/examples/llava/android/build_64.sh +8 -0
- package/src/llama.cpp/examples/llava/clip.cpp +23 -14
- package/src/llama.cpp/examples/llava/llava-cli.cpp +8 -6
- package/src/llama.cpp/examples/llava/requirements.txt +3 -2
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +2 -1
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +2 -0
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -2
- package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
- package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/main/main.cpp +98 -75
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +4 -5
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/parallel.cpp +2 -1
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/passkey/passkey.cpp +23 -43
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +13 -10
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +37 -34
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +1 -1
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +26 -77
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +14 -7
- package/src/llama.cpp/examples/server/CMakeLists.txt +26 -2
- package/src/llama.cpp/examples/server/server.cpp +274 -671
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
- package/src/llama.cpp/examples/server/utils.hpp +28 -29
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +21 -29
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +2 -1
- package/src/llama.cpp/examples/sycl/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +23 -0
- package/src/llama.cpp/examples/sycl/run-llama2.sh +36 -0
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +33 -0
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +9 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +16 -2
- package/src/llama.cpp/ggml/CMakeLists.txt +253 -0
- package/src/llama.cpp/{cmake → ggml/cmake}/FindSIMD.cmake +6 -6
- package/src/llama.cpp/{ggml-backend.h → ggml/include/ggml-backend.h} +22 -17
- package/src/llama.cpp/ggml/include/ggml-blas.h +23 -0
- package/src/llama.cpp/ggml/include/ggml-cann.h +125 -0
- package/src/llama.cpp/{ggml-cuda.h → ggml/include/ggml-cuda.h} +3 -0
- package/src/llama.cpp/{ggml-metal.h → ggml/include/ggml-metal.h} +1 -2
- package/src/llama.cpp/{ggml-sycl.h → ggml/include/ggml-sycl.h} +3 -10
- package/src/llama.cpp/{ggml.h → ggml/include/ggml.h} +80 -85
- package/src/llama.cpp/ggml/src/CMakeLists.txt +1329 -0
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +2193 -0
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +39 -0
- package/src/llama.cpp/{ggml-alloc.c → ggml/src/ggml-alloc.c} +100 -49
- package/src/llama.cpp/{ggml-backend-impl.h → ggml/src/ggml-backend-impl.h} +20 -8
- package/src/llama.cpp/{ggml-backend.c → ggml/src/ggml-backend.c} +307 -167
- package/src/llama.cpp/ggml/src/ggml-blas.cpp +367 -0
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +198 -0
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +230 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +2944 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +592 -0
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +282 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +32 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +223 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +180 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +193 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +208 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +206 -0
- package/src/llama.cpp/ggml/src/ggml-cann.cpp +2023 -0
- package/src/llama.cpp/{ggml-common.h → ggml/src/ggml-common.h} +41 -7
- package/src/llama.cpp/{ggml-impl.h → ggml/src/ggml-impl.h} +113 -9
- package/src/llama.cpp/{ggml-kompute.cpp → ggml/src/ggml-kompute.cpp} +33 -18
- package/src/llama.cpp/{ggml-quants.c → ggml/src/ggml-quants.c} +1460 -940
- package/src/llama.cpp/{ggml-quants.h → ggml/src/ggml-quants.h} +19 -20
- package/src/llama.cpp/{ggml-rpc.cpp → ggml/src/ggml-rpc.cpp} +95 -72
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +27 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +53 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +355 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +195 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +547 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +27 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +698 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +27 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +3011 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +3031 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +33 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1027 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +27 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +374 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +35 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +66 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +275 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +22 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +251 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +24 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +1140 -0
- package/src/llama.cpp/ggml/src/ggml-sycl.cpp +5314 -0
- package/src/llama.cpp/{ggml-vulkan.cpp → ggml/src/ggml-vulkan.cpp} +1781 -1868
- package/src/llama.cpp/{ggml.c → ggml/src/ggml.c} +1245 -2087
- package/src/llama.cpp/{sgemm.cpp → ggml/src/llamafile/sgemm.cpp} +21 -24
- package/src/llama.cpp/{sgemm.h → ggml/src/llamafile/sgemm.h} +1 -1
- package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +5 -0
- package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +552 -0
- package/src/llama.cpp/{llama.h → include/llama.h} +175 -100
- package/src/llama.cpp/models/.editorconfig +1 -0
- package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +46 -0
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
- package/src/llama.cpp/requirements/requirements-all.txt +12 -0
- package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +2 -0
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -0
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +3 -0
- package/src/llama.cpp/requirements/{requirements-convert.txt → requirements-convert_legacy_llama.txt} +1 -1
- package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +1 -0
- package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/src/llama.cpp/requirements/requirements-pydantic.txt +3 -0
- package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +1 -0
- package/src/llama.cpp/requirements.txt +5 -4
- package/src/llama.cpp/scripts/build-info.sh +30 -0
- package/src/llama.cpp/scripts/install-oneapi.bat +19 -0
- package/src/llama.cpp/src/CMakeLists.txt +33 -0
- package/src/llama.cpp/src/llama-grammar.cpp +539 -0
- package/src/llama.cpp/src/llama-grammar.h +39 -0
- package/src/llama.cpp/src/llama-impl.h +26 -0
- package/src/llama.cpp/src/llama-sampling.cpp +635 -0
- package/src/llama.cpp/src/llama-sampling.h +56 -0
- package/src/llama.cpp/src/llama-vocab.cpp +1721 -0
- package/src/llama.cpp/src/llama-vocab.h +130 -0
- package/src/llama.cpp/{llama.cpp → src/llama.cpp} +5979 -5260
- package/src/llama.cpp/{unicode-data.cpp → src/unicode-data.cpp} +851 -802
- package/src/llama.cpp/{unicode.cpp → src/unicode.cpp} +52 -30
- package/src/llama.cpp/{unicode.h → src/unicode.h} +5 -1
- package/src/llama.cpp/tests/CMakeLists.txt +19 -20
- package/src/llama.cpp/tests/test-backend-ops.cpp +245 -67
- package/src/llama.cpp/tests/test-chat-template.cpp +57 -3
- package/src/llama.cpp/tests/test-double-float.cpp +2 -2
- package/src/llama.cpp/tests/test-grad0.cpp +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +978 -31
- package/src/llama.cpp/tests/test-grammar-parser.cpp +423 -158
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +508 -135
- package/src/llama.cpp/tests/test-llama-grammar.cpp +15 -9
- package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -1
- package/src/llama.cpp/tests/test-quantize-perf.cpp +1 -1
- package/src/llama.cpp/tests/test-rope.cpp +3 -4
- package/src/llama.cpp/tests/test-sampling.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +6 -6
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +20 -15
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +22 -11
- package/bin/darwin/arm64/default.metallib +0 -0
- package/bin/darwin/x64/default.metallib +0 -0
- package/src/llama.cpp/examples/beam-search/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/beam-search/beam-search.cpp +0 -188
- package/src/llama.cpp/examples/finetune/finetune.cpp +0 -1862
- package/src/llama.cpp/examples/llama.android/llama/CMakeLists.txt +0 -55
- package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +0 -1253
- package/src/llama.cpp/ggml-opencl.cpp +0 -2305
- package/src/llama.cpp/ggml-opencl.h +0 -36
- package/src/llama.cpp/ggml-sycl.cpp +0 -17340
- package/src/llama.cpp/ggml-vulkan-shaders.hpp +0 -81211
- package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf-update.txt +0 -2
- package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +0 -2
- package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +0 -1
- package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +0 -24
- /package/src/llama.cpp/{ggml-alloc.h → ggml/include/ggml-alloc.h} +0 -0
- /package/src/llama.cpp/{ggml-kompute.h → ggml/include/ggml-kompute.h} +0 -0
- /package/src/llama.cpp/{ggml-rpc.h → ggml/include/ggml-rpc.h} +0 -0
- /package/src/llama.cpp/{ggml-vulkan.h → ggml/include/ggml-vulkan.h} +0 -0
- /package/src/llama.cpp/{unicode-data.h → src/unicode-data.h} +0 -0
|
@@ -79,22 +79,22 @@ endmacro()
|
|
|
79
79
|
# flags are for MSVC only!
|
|
80
80
|
check_sse("AVX" " ;/arch:AVX")
|
|
81
81
|
if (NOT ${AVX_FOUND})
|
|
82
|
-
set(
|
|
82
|
+
set(GGML_AVX OFF)
|
|
83
83
|
else()
|
|
84
|
-
set(
|
|
84
|
+
set(GGML_AVX ON)
|
|
85
85
|
endif()
|
|
86
86
|
|
|
87
87
|
check_sse("AVX2" " ;/arch:AVX2")
|
|
88
88
|
check_sse("FMA" " ;/arch:AVX2")
|
|
89
89
|
if ((NOT ${AVX2_FOUND}) OR (NOT ${FMA_FOUND}))
|
|
90
|
-
set(
|
|
90
|
+
set(GGML_AVX2 OFF)
|
|
91
91
|
else()
|
|
92
|
-
set(
|
|
92
|
+
set(GGML_AVX2 ON)
|
|
93
93
|
endif()
|
|
94
94
|
|
|
95
95
|
check_sse("AVX512" " ;/arch:AVX512")
|
|
96
96
|
if (NOT ${AVX512_FOUND})
|
|
97
|
-
set(
|
|
97
|
+
set(GGML_AVX512 OFF)
|
|
98
98
|
else()
|
|
99
|
-
set(
|
|
99
|
+
set(GGML_AVX512 ON)
|
|
100
100
|
endif()
|
|
@@ -23,28 +23,29 @@ extern "C" {
|
|
|
23
23
|
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
|
|
24
24
|
GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
|
|
25
25
|
GGML_API GGML_CALL size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
|
|
26
|
-
GGML_API bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend);
|
|
27
26
|
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
|
|
28
27
|
|
|
29
28
|
// buffer
|
|
30
29
|
enum ggml_backend_buffer_usage {
|
|
31
30
|
GGML_BACKEND_BUFFER_USAGE_ANY = 0,
|
|
32
31
|
GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
|
|
32
|
+
GGML_BACKEND_BUFFER_USAGE_COMPUTE = 2,
|
|
33
33
|
};
|
|
34
34
|
|
|
35
|
-
GGML_API const char *
|
|
36
|
-
GGML_API void
|
|
37
|
-
GGML_API void *
|
|
38
|
-
GGML_API size_t
|
|
39
|
-
GGML_API GGML_CALL void
|
|
40
|
-
GGML_API size_t
|
|
41
|
-
GGML_API size_t
|
|
42
|
-
GGML_API size_t
|
|
43
|
-
GGML_API void
|
|
44
|
-
GGML_API bool
|
|
45
|
-
GGML_API void
|
|
46
|
-
GGML_API
|
|
47
|
-
GGML_API
|
|
35
|
+
GGML_API const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer);
|
|
36
|
+
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
|
|
37
|
+
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
|
|
38
|
+
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
|
|
39
|
+
GGML_API GGML_CALL void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
|
40
|
+
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
|
41
|
+
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
|
|
42
|
+
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
|
43
|
+
GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value);
|
|
44
|
+
GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer);
|
|
45
|
+
GGML_API void ggml_backend_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
|
|
46
|
+
GGML_API enum ggml_backend_buffer_usage ggml_backend_buffer_get_usage (ggml_backend_buffer_t buffer);
|
|
47
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer);
|
|
48
|
+
GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer);
|
|
48
49
|
|
|
49
50
|
//
|
|
50
51
|
// Backend
|
|
@@ -74,6 +75,7 @@ extern "C" {
|
|
|
74
75
|
GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
|
75
76
|
GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
|
76
77
|
GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
|
78
|
+
GGML_API bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
|
|
77
79
|
GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
|
78
80
|
|
|
79
81
|
// tensor copy between different backends
|
|
@@ -90,7 +92,7 @@ extern "C" {
|
|
|
90
92
|
GGML_API void ggml_backend_event_free (ggml_backend_event_t event);
|
|
91
93
|
GGML_API void ggml_backend_event_record (ggml_backend_event_t event);
|
|
92
94
|
GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
|
|
93
|
-
GGML_API void ggml_backend_event_wait (ggml_backend_t backend, ggml_backend_event_t event);
|
|
95
|
+
GGML_API void ggml_backend_event_wait (ggml_backend_t backend, ggml_backend_event_t event);
|
|
94
96
|
|
|
95
97
|
//
|
|
96
98
|
// CPU backend
|
|
@@ -119,7 +121,7 @@ extern "C" {
|
|
|
119
121
|
|
|
120
122
|
GGML_API size_t ggml_backend_reg_get_count(void);
|
|
121
123
|
GGML_API size_t ggml_backend_reg_find_by_name(const char * name);
|
|
122
|
-
GGML_API ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str); // str is
|
|
124
|
+
GGML_API ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str); // str is backend_name:params (params is optional)
|
|
123
125
|
GGML_API const char * ggml_backend_reg_get_name(size_t i);
|
|
124
126
|
GGML_API ggml_backend_t ggml_backend_reg_init_backend(size_t i, const char * params); // params is backend-specific
|
|
125
127
|
GGML_API ggml_backend_buffer_type_t ggml_backend_reg_get_default_buffer_type(size_t i);
|
|
@@ -182,6 +184,9 @@ extern "C" {
|
|
|
182
184
|
// Initialize backend buffers from a measure graph
|
|
183
185
|
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
|
|
184
186
|
|
|
187
|
+
GGML_API int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched);
|
|
188
|
+
GGML_API ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i);
|
|
189
|
+
|
|
185
190
|
// Get the number of splits of the last graph
|
|
186
191
|
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
|
|
187
192
|
GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched);
|
|
@@ -225,7 +230,7 @@ extern "C" {
|
|
|
225
230
|
|
|
226
231
|
// Tensor initialization
|
|
227
232
|
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|
|
228
|
-
GGML_API void ggml_backend_view_init(
|
|
233
|
+
GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
|
|
229
234
|
|
|
230
235
|
|
|
231
236
|
#ifdef __cplusplus
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "ggml.h"
|
|
4
|
+
#include "ggml-backend.h"
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
#ifdef __cplusplus
|
|
8
|
+
extern "C" {
|
|
9
|
+
#endif
|
|
10
|
+
|
|
11
|
+
// backend API
|
|
12
|
+
GGML_API GGML_CALL ggml_backend_t ggml_backend_blas_init(void);
|
|
13
|
+
|
|
14
|
+
GGML_API GGML_CALL bool ggml_backend_is_blas(ggml_backend_t backend);
|
|
15
|
+
|
|
16
|
+
// number of threads used for conversion to float
|
|
17
|
+
// for openblas and blis, this will also set the number of threads used for blas operations
|
|
18
|
+
GGML_API GGML_CALL void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
#ifdef __cplusplus
|
|
22
|
+
}
|
|
23
|
+
#endif
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) 2023-2024 The ggml authors
|
|
3
|
+
*
|
|
4
|
+
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
5
|
+
* of this software and associated documentation files (the "Software"), to
|
|
6
|
+
* deal in the Software without restriction, including without limitation the
|
|
7
|
+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
8
|
+
* sell copies of the Software, and to permit persons to whom the Software is
|
|
9
|
+
* furnished to do so, subject to the following conditions:
|
|
10
|
+
*
|
|
11
|
+
* The above copyright notice and this permission notice shall be included in
|
|
12
|
+
* all copies or substantial portions of the Software.
|
|
13
|
+
*
|
|
14
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
15
|
+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
16
|
+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
17
|
+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
18
|
+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
19
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
20
|
+
* IN THE SOFTWARE.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
#pragma once
|
|
24
|
+
|
|
25
|
+
#include "ggml-backend.h"
|
|
26
|
+
#include "ggml.h"
|
|
27
|
+
|
|
28
|
+
#ifdef __cplusplus
|
|
29
|
+
extern "C" {
|
|
30
|
+
#endif
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* @brief Maximum number of CANN devices supported.
|
|
34
|
+
*/
|
|
35
|
+
#define GGML_CANN_MAX_DEVICES 16
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* @brief Initializes the CANN backend for a specified device.
|
|
39
|
+
*
|
|
40
|
+
* This function initializes the CANN backend for the given device.
|
|
41
|
+
* It verifies the device index, allocates a context, and creates a backend
|
|
42
|
+
* instance.
|
|
43
|
+
*
|
|
44
|
+
* @param device The index of the device to initialize.
|
|
45
|
+
* @return A pointer to the initialized backend instance, or nullptr on failure.
|
|
46
|
+
*/
|
|
47
|
+
GGML_API GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device);
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* @brief Checks if a given backend is a CANN backend.
|
|
51
|
+
*
|
|
52
|
+
* This function verifies if the provided backend is a CANN backend by comparing
|
|
53
|
+
* its GUID with the CANN backend's GUID.
|
|
54
|
+
*
|
|
55
|
+
* @param backend The backend instance to check.
|
|
56
|
+
* @return True if the backend is a CANN backend, false otherwise.
|
|
57
|
+
*/
|
|
58
|
+
GGML_API GGML_CALL bool ggml_backend_is_cann(ggml_backend_t backend);
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* @brief Retrieves the CANN buffer type for a specified device.
|
|
62
|
+
*
|
|
63
|
+
* This function initializes and returns the buffer type interface associated
|
|
64
|
+
* with the given device. It ensures thread-safe access using a mutex.
|
|
65
|
+
*
|
|
66
|
+
* @param device The device index for which to retrieve the buffer type.
|
|
67
|
+
* @return A pointer to the buffer type interface for the specified device, or
|
|
68
|
+
* nullptr if the device index is out of range.
|
|
69
|
+
*/
|
|
70
|
+
GGML_API GGML_CALL ggml_backend_buffer_type_t
|
|
71
|
+
ggml_backend_cann_buffer_type(int32_t device);
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* @brief Retrieves the number of CANN devices available.
|
|
75
|
+
*
|
|
76
|
+
* This function returns the number of CANN devices available based on
|
|
77
|
+
* information obtained from `ggml_cann_info()`.
|
|
78
|
+
*
|
|
79
|
+
* @return The number of CANN devices available.
|
|
80
|
+
*/
|
|
81
|
+
GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* @brief Retrieves the description of a specific CANN device.
|
|
85
|
+
*
|
|
86
|
+
* This function sets the specified device, retrieves the SoC name,
|
|
87
|
+
* and writes it into the provided description buffer.
|
|
88
|
+
*
|
|
89
|
+
* @param device The device index to retrieve the description for.
|
|
90
|
+
* @param description Pointer to a buffer where the description will be written.
|
|
91
|
+
* @param description_size Size of the description buffer.
|
|
92
|
+
*/
|
|
93
|
+
GGML_API GGML_CALL void ggml_backend_cann_get_device_description(
|
|
94
|
+
int32_t device, char* description, size_t description_size);
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* @brief Retrieves the memory information of a specific CANN device.
|
|
98
|
+
*
|
|
99
|
+
* This function sets the specified device, retrieves the free and total
|
|
100
|
+
* memory information of the specified type (ACL_HBM_MEM), and stores them
|
|
101
|
+
* in the provided pointers.
|
|
102
|
+
*
|
|
103
|
+
* @param device The device index to retrieve memory information for.
|
|
104
|
+
* @param free Pointer to a variable where the free memory size will be stored.
|
|
105
|
+
* @param total Pointer to a variable where the total memory size will be
|
|
106
|
+
* stored.
|
|
107
|
+
*/
|
|
108
|
+
GGML_API GGML_CALL void ggml_backend_cann_get_device_memory(int32_t device,
|
|
109
|
+
size_t* free,
|
|
110
|
+
size_t* total);
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* @brief Set the logging callback for GGML.
|
|
114
|
+
*
|
|
115
|
+
* This function sets the logging callback and user data for logging.
|
|
116
|
+
*
|
|
117
|
+
* @param log_callback The logging callback to set.
|
|
118
|
+
* @param user_data User data to pass to the logging callback.
|
|
119
|
+
*/
|
|
120
|
+
GGML_API void ggml_backend_cann_log_set_callback(ggml_log_callback log_callback,
|
|
121
|
+
void* user_data);
|
|
122
|
+
|
|
123
|
+
#ifdef __cplusplus
|
|
124
|
+
}
|
|
125
|
+
#endif
|
|
@@ -6,6 +6,9 @@
|
|
|
6
6
|
#ifdef GGML_USE_HIPBLAS
|
|
7
7
|
#define GGML_CUDA_NAME "ROCm"
|
|
8
8
|
#define GGML_CUBLAS_NAME "hipBLAS"
|
|
9
|
+
#elif defined(GGML_USE_MUSA)
|
|
10
|
+
#define GGML_CUDA_NAME "MUSA"
|
|
11
|
+
#define GGML_CUBLAS_NAME "muBLAS"
|
|
9
12
|
#else
|
|
10
13
|
#define GGML_CUDA_NAME "CUDA"
|
|
11
14
|
#define GGML_CUBLAS_NAME "cuBLAS"
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// An interface allowing to compute ggml_cgraph with Metal
|
|
2
2
|
//
|
|
3
3
|
// This is a fully functional interface that extends ggml with GPU support for Apple devices.
|
|
4
|
-
// A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA,
|
|
4
|
+
// A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, etc.)
|
|
5
5
|
//
|
|
6
6
|
// How it works?
|
|
7
7
|
//
|
|
@@ -63,4 +63,3 @@ GGML_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend);
|
|
|
63
63
|
#ifdef __cplusplus
|
|
64
64
|
}
|
|
65
65
|
#endif
|
|
66
|
-
|
|
@@ -9,13 +9,13 @@
|
|
|
9
9
|
#include "ggml.h"
|
|
10
10
|
#include "ggml-backend.h"
|
|
11
11
|
|
|
12
|
+
#define GGML_SYCL_NAME "SYCL"
|
|
13
|
+
#define GGML_SYCL_MAX_DEVICES 48
|
|
14
|
+
|
|
12
15
|
#ifdef __cplusplus
|
|
13
16
|
extern "C" {
|
|
14
17
|
#endif
|
|
15
18
|
|
|
16
|
-
#define GGML_SYCL_MAX_DEVICES 48
|
|
17
|
-
#define GGML_SYCL_NAME "SYCL"
|
|
18
|
-
|
|
19
19
|
// backend API
|
|
20
20
|
GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
|
|
21
21
|
|
|
@@ -33,13 +33,6 @@ GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len);
|
|
|
33
33
|
GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
|
|
34
34
|
GGML_API GGML_CALL int ggml_backend_sycl_get_device_count();
|
|
35
35
|
GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
|
|
36
|
-
GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
|
|
37
|
-
|
|
38
|
-
// TODO: these are temporary
|
|
39
|
-
// ref: https://github.com/ggerganov/llama.cpp/pull/6022#issuecomment-1992615670
|
|
40
|
-
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index);
|
|
41
|
-
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id);
|
|
42
|
-
GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode();
|
|
43
36
|
|
|
44
37
|
// SYCL doesn't support registering host memory, keep here for reference
|
|
45
38
|
// GGML_API GGML_CALL bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
|