@fugood/llama.node 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +5 -2
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +8 -1
- package/package.json +1 -1
- package/patches/llama.patch +12 -12
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/LlamaContext.cpp +33 -1
- package/src/LlamaContext.h +1 -0
- package/src/LoadSessionWorker.cpp +1 -0
- package/src/llama.cpp/.github/workflows/bench.yml +310 -0
- package/src/llama.cpp/.github/workflows/build.yml +1315 -0
- package/src/llama.cpp/.github/workflows/close-issue.yml +23 -0
- package/src/llama.cpp/.github/workflows/docker.yml +116 -0
- package/src/llama.cpp/.github/workflows/editorconfig.yml +27 -0
- package/src/llama.cpp/.github/workflows/gguf-publish.yml +44 -0
- package/src/llama.cpp/.github/workflows/labeler.yml +17 -0
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +65 -0
- package/src/llama.cpp/.github/workflows/nix-ci.yml +72 -0
- package/src/llama.cpp/.github/workflows/nix-flake-update.yml +22 -0
- package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +36 -0
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +35 -0
- package/src/llama.cpp/.github/workflows/python-lint.yml +23 -0
- package/src/llama.cpp/.github/workflows/python-type-check.yml +38 -0
- package/src/llama.cpp/.github/workflows/server.yml +183 -0
- package/src/llama.cpp/CMakeLists.txt +91 -1245
- package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +1 -1
- package/src/llama.cpp/cmake/build-info.cmake +58 -0
- package/src/llama.cpp/cmake/git-vars.cmake +22 -0
- package/src/llama.cpp/common/CMakeLists.txt +4 -3
- package/src/llama.cpp/common/build-info.cpp.in +4 -0
- package/src/llama.cpp/common/common.cpp +1116 -877
- package/src/llama.cpp/common/common.h +191 -77
- package/src/llama.cpp/common/grammar-parser.cpp +118 -31
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +346 -65
- package/src/llama.cpp/common/log.h +1 -1
- package/src/llama.cpp/common/ngram-cache.h +10 -3
- package/src/llama.cpp/common/sampling.cpp +19 -10
- package/src/llama.cpp/docs/build.md +353 -0
- package/src/llama.cpp/examples/CMakeLists.txt +22 -22
- package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +6 -6
- package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +52 -55
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +20 -72
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/chat-13B.bat +57 -0
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/{finetune → cvector-generator}/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/cvector-generator/completions.txt +582 -0
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +503 -0
- package/src/llama.cpp/examples/cvector-generator/mean.hpp +48 -0
- package/src/llama.cpp/examples/cvector-generator/negative.txt +4 -0
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +325 -0
- package/src/llama.cpp/examples/cvector-generator/positive.txt +4 -0
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +35 -0
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +94 -46
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +4 -6
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +344 -386
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +30 -25
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf/gguf.cpp +5 -0
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +15 -0
- package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +46 -0
- package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +295 -0
- package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +52 -0
- package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +221 -0
- package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +24 -0
- package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +42 -0
- package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +7093 -0
- package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +693 -0
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +3 -3
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +6 -2
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +137 -176
- package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +38 -153
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +175 -94
- package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +65 -0
- package/src/llama.cpp/examples/llama.android/build.gradle.kts +6 -0
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +68 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +11 -7
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +2 -2
- package/src/llama.cpp/examples/llama.android/settings.gradle.kts +18 -0
- package/src/llama.cpp/examples/llava/CMakeLists.txt +6 -5
- package/src/llama.cpp/examples/llava/android/build_64.sh +8 -0
- package/src/llama.cpp/examples/llava/clip.cpp +23 -14
- package/src/llama.cpp/examples/llava/llava-cli.cpp +8 -6
- package/src/llama.cpp/examples/llava/requirements.txt +3 -2
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +2 -1
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +2 -0
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -2
- package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
- package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/main/main.cpp +98 -75
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +4 -5
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/parallel.cpp +2 -1
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/passkey/passkey.cpp +23 -43
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +13 -10
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +37 -34
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +1 -1
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +26 -77
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +14 -7
- package/src/llama.cpp/examples/server/CMakeLists.txt +26 -2
- package/src/llama.cpp/examples/server/server.cpp +274 -671
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
- package/src/llama.cpp/examples/server/utils.hpp +28 -29
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +21 -29
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +2 -1
- package/src/llama.cpp/examples/sycl/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +23 -0
- package/src/llama.cpp/examples/sycl/run-llama2.sh +36 -0
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +33 -0
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +9 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +16 -2
- package/src/llama.cpp/ggml/CMakeLists.txt +253 -0
- package/src/llama.cpp/{cmake → ggml/cmake}/FindSIMD.cmake +6 -6
- package/src/llama.cpp/{ggml-backend.h → ggml/include/ggml-backend.h} +22 -17
- package/src/llama.cpp/ggml/include/ggml-blas.h +23 -0
- package/src/llama.cpp/ggml/include/ggml-cann.h +125 -0
- package/src/llama.cpp/{ggml-cuda.h → ggml/include/ggml-cuda.h} +3 -0
- package/src/llama.cpp/{ggml-metal.h → ggml/include/ggml-metal.h} +1 -2
- package/src/llama.cpp/{ggml-sycl.h → ggml/include/ggml-sycl.h} +3 -10
- package/src/llama.cpp/{ggml.h → ggml/include/ggml.h} +80 -85
- package/src/llama.cpp/ggml/src/CMakeLists.txt +1329 -0
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +2193 -0
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +39 -0
- package/src/llama.cpp/{ggml-alloc.c → ggml/src/ggml-alloc.c} +100 -49
- package/src/llama.cpp/{ggml-backend-impl.h → ggml/src/ggml-backend-impl.h} +20 -8
- package/src/llama.cpp/{ggml-backend.c → ggml/src/ggml-backend.c} +307 -167
- package/src/llama.cpp/ggml/src/ggml-blas.cpp +367 -0
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +198 -0
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +230 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +2944 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +592 -0
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +282 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +32 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +223 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +180 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +193 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +208 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +206 -0
- package/src/llama.cpp/ggml/src/ggml-cann.cpp +2023 -0
- package/src/llama.cpp/{ggml-common.h → ggml/src/ggml-common.h} +41 -7
- package/src/llama.cpp/{ggml-impl.h → ggml/src/ggml-impl.h} +113 -9
- package/src/llama.cpp/{ggml-kompute.cpp → ggml/src/ggml-kompute.cpp} +33 -18
- package/src/llama.cpp/{ggml-quants.c → ggml/src/ggml-quants.c} +1460 -940
- package/src/llama.cpp/{ggml-quants.h → ggml/src/ggml-quants.h} +19 -20
- package/src/llama.cpp/{ggml-rpc.cpp → ggml/src/ggml-rpc.cpp} +95 -72
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +27 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +53 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +355 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +195 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +547 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +27 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +698 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +27 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +3011 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +3031 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +33 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1027 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +27 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +374 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +35 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +66 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +275 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +22 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +251 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +24 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +1140 -0
- package/src/llama.cpp/ggml/src/ggml-sycl.cpp +5314 -0
- package/src/llama.cpp/{ggml-vulkan.cpp → ggml/src/ggml-vulkan.cpp} +1781 -1868
- package/src/llama.cpp/{ggml.c → ggml/src/ggml.c} +1245 -2087
- package/src/llama.cpp/{sgemm.cpp → ggml/src/llamafile/sgemm.cpp} +21 -24
- package/src/llama.cpp/{sgemm.h → ggml/src/llamafile/sgemm.h} +1 -1
- package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +5 -0
- package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +552 -0
- package/src/llama.cpp/{llama.h → include/llama.h} +175 -100
- package/src/llama.cpp/models/.editorconfig +1 -0
- package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +46 -0
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
- package/src/llama.cpp/requirements/requirements-all.txt +12 -0
- package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +2 -0
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -0
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +3 -0
- package/src/llama.cpp/requirements/{requirements-convert.txt → requirements-convert_legacy_llama.txt} +1 -1
- package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +1 -0
- package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/src/llama.cpp/requirements/requirements-pydantic.txt +3 -0
- package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +1 -0
- package/src/llama.cpp/requirements.txt +5 -4
- package/src/llama.cpp/scripts/build-info.sh +30 -0
- package/src/llama.cpp/scripts/install-oneapi.bat +19 -0
- package/src/llama.cpp/src/CMakeLists.txt +33 -0
- package/src/llama.cpp/src/llama-grammar.cpp +539 -0
- package/src/llama.cpp/src/llama-grammar.h +39 -0
- package/src/llama.cpp/src/llama-impl.h +26 -0
- package/src/llama.cpp/src/llama-sampling.cpp +635 -0
- package/src/llama.cpp/src/llama-sampling.h +56 -0
- package/src/llama.cpp/src/llama-vocab.cpp +1721 -0
- package/src/llama.cpp/src/llama-vocab.h +130 -0
- package/src/llama.cpp/{llama.cpp → src/llama.cpp} +5979 -5260
- package/src/llama.cpp/{unicode-data.cpp → src/unicode-data.cpp} +851 -802
- package/src/llama.cpp/{unicode.cpp → src/unicode.cpp} +52 -30
- package/src/llama.cpp/{unicode.h → src/unicode.h} +5 -1
- package/src/llama.cpp/tests/CMakeLists.txt +19 -20
- package/src/llama.cpp/tests/test-backend-ops.cpp +245 -67
- package/src/llama.cpp/tests/test-chat-template.cpp +57 -3
- package/src/llama.cpp/tests/test-double-float.cpp +2 -2
- package/src/llama.cpp/tests/test-grad0.cpp +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +978 -31
- package/src/llama.cpp/tests/test-grammar-parser.cpp +423 -158
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +508 -135
- package/src/llama.cpp/tests/test-llama-grammar.cpp +15 -9
- package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -1
- package/src/llama.cpp/tests/test-quantize-perf.cpp +1 -1
- package/src/llama.cpp/tests/test-rope.cpp +3 -4
- package/src/llama.cpp/tests/test-sampling.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +6 -6
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +20 -15
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +22 -11
- package/bin/darwin/arm64/default.metallib +0 -0
- package/bin/darwin/x64/default.metallib +0 -0
- package/src/llama.cpp/examples/beam-search/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/beam-search/beam-search.cpp +0 -188
- package/src/llama.cpp/examples/finetune/finetune.cpp +0 -1862
- package/src/llama.cpp/examples/llama.android/llama/CMakeLists.txt +0 -55
- package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +0 -1253
- package/src/llama.cpp/ggml-opencl.cpp +0 -2305
- package/src/llama.cpp/ggml-opencl.h +0 -36
- package/src/llama.cpp/ggml-sycl.cpp +0 -17340
- package/src/llama.cpp/ggml-vulkan-shaders.hpp +0 -81211
- package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf-update.txt +0 -2
- package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +0 -2
- package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +0 -1
- package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +0 -24
- /package/src/llama.cpp/{ggml-alloc.h → ggml/include/ggml-alloc.h} +0 -0
- /package/src/llama.cpp/{ggml-kompute.h → ggml/include/ggml-kompute.h} +0 -0
- /package/src/llama.cpp/{ggml-rpc.h → ggml/include/ggml-rpc.h} +0 -0
- /package/src/llama.cpp/{ggml-vulkan.h → ggml/include/ggml-vulkan.h} +0 -0
- /package/src/llama.cpp/{unicode-data.h → src/unicode-data.h} +0 -0
|
@@ -154,7 +154,7 @@ static void test_roundtrip_on_chunk(
|
|
|
154
154
|
}
|
|
155
155
|
|
|
156
156
|
if (use_reference) {
|
|
157
|
-
qfns.
|
|
157
|
+
qfns.from_float_ref(input_scratch, quantized_scratch, chunk_size);
|
|
158
158
|
} else {
|
|
159
159
|
qfns.from_float(input_scratch, quantized_scratch, chunk_size);
|
|
160
160
|
}
|
|
@@ -4,72 +4,12 @@
|
|
|
4
4
|
#include <algorithm>
|
|
5
5
|
#include <fstream>
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
int32_t chunk_size = 64; // chunk size for context embedding
|
|
10
|
-
std::string chunk_separator = "\n"; // chunk separator for context embedding
|
|
11
|
-
};
|
|
12
|
-
|
|
13
|
-
static void retrieval_params_print_usage(int argc, char ** argv, gpt_params & gpt_params, retrieval_params & params) {
|
|
14
|
-
gpt_params_print_usage(argc, argv, gpt_params);
|
|
15
|
-
printf("retrieval options:\n");
|
|
16
|
-
printf(" --context-file FNAME file containing context to embed.\n");
|
|
17
|
-
printf(" specify multiple files by providing --context-file option multiple times.\n");
|
|
18
|
-
printf(" --chunk-size N minimum length of embedded text chunk (default:%d)\n", params.chunk_size);
|
|
19
|
-
printf(" --chunk-separator STRING\n");
|
|
20
|
-
printf(" string to separate chunks (default: \"\\n\")\n");
|
|
21
|
-
printf("\n");
|
|
22
|
-
}
|
|
7
|
+
static void print_usage(int argc, char ** argv, const gpt_params & params) {
|
|
8
|
+
gpt_params_print_usage(argc, argv, params);
|
|
23
9
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
while (i < argc) {
|
|
28
|
-
arg = argv[i];
|
|
29
|
-
bool invalid_gpt_param = false;
|
|
30
|
-
if(gpt_params_find_arg(argc, argv, argv[i], gpt_params, i, invalid_gpt_param)) {
|
|
31
|
-
if (invalid_gpt_param) {
|
|
32
|
-
fprintf(stderr, "error: invalid argument: %s\n", arg.c_str());
|
|
33
|
-
retrieval_params_print_usage(argc, argv, gpt_params, retrieval_params);
|
|
34
|
-
exit(1);
|
|
35
|
-
}
|
|
36
|
-
// option was parsed by gpt_params_find_arg
|
|
37
|
-
} else if (arg == "--context-file") {
|
|
38
|
-
if (++i >= argc) {
|
|
39
|
-
fprintf(stderr, "error: missing argument for --context-file\n");
|
|
40
|
-
retrieval_params_print_usage(argc, argv, gpt_params, retrieval_params);
|
|
41
|
-
exit(1);
|
|
42
|
-
}
|
|
43
|
-
std::ifstream file(argv[i]);
|
|
44
|
-
if (!file) {
|
|
45
|
-
fprintf(stderr, "error: failed to open file '%s'\n", argv[i]);
|
|
46
|
-
retrieval_params_print_usage(argc, argv, gpt_params, retrieval_params);
|
|
47
|
-
exit(1);
|
|
48
|
-
}
|
|
49
|
-
// store the external file name in params
|
|
50
|
-
retrieval_params.context_files.push_back(argv[i]);
|
|
51
|
-
} else if (arg == "--chunk-size") {
|
|
52
|
-
if (++i >= argc) {
|
|
53
|
-
fprintf(stderr, "error: missing argument for --chunk-size\n");
|
|
54
|
-
retrieval_params_print_usage(argc, argv, gpt_params, retrieval_params);
|
|
55
|
-
exit(1);
|
|
56
|
-
}
|
|
57
|
-
retrieval_params.chunk_size = std::stoi(argv[i]);
|
|
58
|
-
} else if (arg == "--chunk-separator") {
|
|
59
|
-
if (++i >= argc) {
|
|
60
|
-
fprintf(stderr, "error: missing argument for --chunk-separator\n");
|
|
61
|
-
retrieval_params_print_usage(argc, argv, gpt_params, retrieval_params);
|
|
62
|
-
exit(1);
|
|
63
|
-
}
|
|
64
|
-
retrieval_params.chunk_separator = argv[i];
|
|
65
|
-
} else {
|
|
66
|
-
// unknown argument
|
|
67
|
-
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
|
68
|
-
retrieval_params_print_usage(argc, argv, gpt_params, retrieval_params);
|
|
69
|
-
exit(1);
|
|
70
|
-
}
|
|
71
|
-
i++;
|
|
72
|
-
}
|
|
10
|
+
LOG_TEE("\nexample usage:\n");
|
|
11
|
+
LOG_TEE("\n %s --model ./models/bge-base-en-v1.5-f16.gguf --top-k 3 --context-file README.md --context-file License --chunk-size 100 --chunk-separator .\n", argv[0]);
|
|
12
|
+
LOG_TEE("\n");
|
|
73
13
|
}
|
|
74
14
|
|
|
75
15
|
struct chunk {
|
|
@@ -133,9 +73,10 @@ static std::vector<chunk> chunk_file(const std::string & filename, int chunk_siz
|
|
|
133
73
|
return chunks;
|
|
134
74
|
}
|
|
135
75
|
|
|
136
|
-
static void batch_add_seq(llama_batch & batch, const std::vector<int32_t> & tokens,
|
|
137
|
-
|
|
138
|
-
|
|
76
|
+
static void batch_add_seq(llama_batch & batch, const std::vector<int32_t> & tokens, llama_seq_id seq_id) {
|
|
77
|
+
size_t n_tokens = tokens.size();
|
|
78
|
+
for (size_t i = 0; i < n_tokens; i++) {
|
|
79
|
+
llama_batch_add(batch, tokens[i], i, { seq_id }, true);
|
|
139
80
|
}
|
|
140
81
|
}
|
|
141
82
|
|
|
@@ -171,33 +112,35 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu
|
|
|
171
112
|
|
|
172
113
|
int main(int argc, char ** argv) {
|
|
173
114
|
gpt_params params;
|
|
174
|
-
retrieval_params retrieval_params;
|
|
175
115
|
|
|
176
|
-
|
|
116
|
+
if (!gpt_params_parse(argc, argv, params)) {
|
|
117
|
+
print_usage(argc, argv, params);
|
|
118
|
+
return 1;
|
|
119
|
+
}
|
|
177
120
|
|
|
178
121
|
// For BERT models, batch size must be equal to ubatch size
|
|
179
122
|
params.n_ubatch = params.n_batch;
|
|
123
|
+
params.embedding = true;
|
|
180
124
|
|
|
181
|
-
if (
|
|
125
|
+
if (params.chunk_size <= 0) {
|
|
182
126
|
fprintf(stderr, "chunk_size must be positive\n");
|
|
183
127
|
return 1;
|
|
184
128
|
}
|
|
185
|
-
if (
|
|
129
|
+
if (params.context_files.empty()) {
|
|
186
130
|
fprintf(stderr, "context_files must be specified\n");
|
|
187
131
|
return 1;
|
|
188
132
|
}
|
|
189
|
-
params.embedding = true;
|
|
190
133
|
|
|
191
134
|
print_build_info();
|
|
192
135
|
|
|
193
136
|
printf("processing files:\n");
|
|
194
|
-
for (auto & context_file :
|
|
137
|
+
for (auto & context_file : params.context_files) {
|
|
195
138
|
printf("%s\n", context_file.c_str());
|
|
196
139
|
}
|
|
197
140
|
|
|
198
141
|
std::vector<chunk> chunks;
|
|
199
|
-
for (auto & context_file :
|
|
200
|
-
std::vector<chunk> file_chunk = chunk_file(context_file,
|
|
142
|
+
for (auto & context_file : params.context_files) {
|
|
143
|
+
std::vector<chunk> file_chunk = chunk_file(context_file, params.chunk_size, params.chunk_separator);
|
|
201
144
|
chunks.insert(chunks.end(), file_chunk.begin(), file_chunk.end());
|
|
202
145
|
}
|
|
203
146
|
printf("Number of chunks: %ld\n", chunks.size());
|
|
@@ -218,6 +161,12 @@ int main(int argc, char ** argv) {
|
|
|
218
161
|
const int n_ctx_train = llama_n_ctx_train(model);
|
|
219
162
|
const int n_ctx = llama_n_ctx(ctx);
|
|
220
163
|
|
|
164
|
+
const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
|
|
165
|
+
if (pooling_type == LLAMA_POOLING_TYPE_NONE) {
|
|
166
|
+
fprintf(stderr, "%s: error: pooling type NONE not supported\n", __func__);
|
|
167
|
+
return 1;
|
|
168
|
+
}
|
|
169
|
+
|
|
221
170
|
if (n_ctx > n_ctx_train) {
|
|
222
171
|
fprintf(stderr, "%s: warning: model was trained on only %d context tokens (%d specified)\n",
|
|
223
172
|
__func__, n_ctx_train, n_ctx);
|
|
@@ -242,7 +191,7 @@ int main(int argc, char ** argv) {
|
|
|
242
191
|
return 1;
|
|
243
192
|
}
|
|
244
193
|
// add eos if not present
|
|
245
|
-
if (inp.empty() || inp.back() != llama_token_eos(model)) {
|
|
194
|
+
if (llama_token_eos(model) >= 0 && (inp.empty() || inp.back() != llama_token_eos(model))) {
|
|
246
195
|
inp.push_back(llama_token_eos(model));
|
|
247
196
|
}
|
|
248
197
|
chunk.tokens = inp;
|
|
@@ -11,6 +11,7 @@ int main(int argc, char ** argv) {
|
|
|
11
11
|
params.prompt = "The quick brown fox";
|
|
12
12
|
|
|
13
13
|
if (!gpt_params_parse(argc, argv, params)) {
|
|
14
|
+
gpt_params_print_usage(argc, argv, params);
|
|
14
15
|
return 1;
|
|
15
16
|
}
|
|
16
17
|
|
|
@@ -46,7 +47,7 @@ int main(int argc, char ** argv) {
|
|
|
46
47
|
// save state (rng, logits, embedding and kv_cache) to file
|
|
47
48
|
{
|
|
48
49
|
std::vector<uint8_t> state_mem(llama_state_get_size(ctx));
|
|
49
|
-
const size_t written = llama_state_get_data(ctx, state_mem.data());
|
|
50
|
+
const size_t written = llama_state_get_data(ctx, state_mem.data(), state_mem.size());
|
|
50
51
|
|
|
51
52
|
FILE *fp_write = fopen("dump_state.bin", "wb");
|
|
52
53
|
fwrite(state_mem.data(), 1, written, fp_write);
|
|
@@ -98,13 +99,16 @@ int main(int argc, char ** argv) {
|
|
|
98
99
|
|
|
99
100
|
// load state (rng, logits, embedding and kv_cache) from file
|
|
100
101
|
{
|
|
101
|
-
std::vector<uint8_t> state_mem
|
|
102
|
+
std::vector<uint8_t> state_mem;
|
|
102
103
|
|
|
103
104
|
FILE * fp_read = fopen("dump_state.bin", "rb");
|
|
105
|
+
fseek(fp_read, 0, SEEK_END);
|
|
106
|
+
state_mem.resize(ftell(fp_read));
|
|
107
|
+
fseek(fp_read, 0, SEEK_SET);
|
|
104
108
|
const size_t read = fread(state_mem.data(), 1, state_mem.size(), fp_read);
|
|
105
109
|
fclose(fp_read);
|
|
106
110
|
|
|
107
|
-
if (read != llama_state_set_data(ctx2, state_mem.data())) {
|
|
111
|
+
if (read != llama_state_set_data(ctx2, state_mem.data(), state_mem.size())) {
|
|
108
112
|
fprintf(stderr, "\n%s : failed to read state\n", __func__);
|
|
109
113
|
llama_free(ctx2);
|
|
110
114
|
llama_free_model(model);
|
|
@@ -158,13 +162,16 @@ int main(int argc, char ** argv) {
|
|
|
158
162
|
|
|
159
163
|
// load state (rng, logits, embedding and kv_cache) from file
|
|
160
164
|
{
|
|
161
|
-
std::vector<uint8_t> state_mem
|
|
165
|
+
std::vector<uint8_t> state_mem;
|
|
162
166
|
|
|
163
167
|
FILE * fp_read = fopen("dump_state.bin", "rb");
|
|
168
|
+
fseek(fp_read, 0, SEEK_END);
|
|
169
|
+
state_mem.resize(ftell(fp_read));
|
|
170
|
+
fseek(fp_read, 0, SEEK_SET);
|
|
164
171
|
const size_t read = fread(state_mem.data(), 1, state_mem.size(), fp_read);
|
|
165
172
|
fclose(fp_read);
|
|
166
173
|
|
|
167
|
-
if (read != llama_state_set_data(ctx3, state_mem.data())) {
|
|
174
|
+
if (read != llama_state_set_data(ctx3, state_mem.data(), state_mem.size())) {
|
|
168
175
|
fprintf(stderr, "\n%s : failed to read state\n", __func__);
|
|
169
176
|
llama_free(ctx3);
|
|
170
177
|
llama_free_model(model);
|
|
@@ -181,7 +188,7 @@ int main(int argc, char ** argv) {
|
|
|
181
188
|
{
|
|
182
189
|
// save kv of seq 0
|
|
183
190
|
std::vector<uint8_t> seq_store(llama_state_seq_get_size(ctx3, 0));
|
|
184
|
-
const size_t ncopy = llama_state_seq_get_data(ctx3, seq_store.data(), 0);
|
|
191
|
+
const size_t ncopy = llama_state_seq_get_data(ctx3, seq_store.data(), seq_store.size(), 0);
|
|
185
192
|
if (ncopy != seq_store.size()) {
|
|
186
193
|
fprintf(stderr, "\n%s : seq copy data length %zd does not match expected length %zd\n", __func__, ncopy, seq_store.size());
|
|
187
194
|
llama_free(ctx3);
|
|
@@ -195,7 +202,7 @@ int main(int argc, char ** argv) {
|
|
|
195
202
|
fprintf(stderr, "%s : kv cache cleared\n", __func__);
|
|
196
203
|
|
|
197
204
|
// restore kv into seq 1
|
|
198
|
-
const size_t nset = llama_state_seq_set_data(ctx3, seq_store.data(), 1);
|
|
205
|
+
const size_t nset = llama_state_seq_set_data(ctx3, seq_store.data(), seq_store.size(), 1);
|
|
199
206
|
if (nset != seq_store.size()) {
|
|
200
207
|
fprintf(stderr, "\n%s : seq set data length %zd does not match expected length %zd\n", __func__, nset, seq_store.size());
|
|
201
208
|
llama_free(ctx3);
|
|
@@ -1,18 +1,37 @@
|
|
|
1
|
-
set(TARGET server)
|
|
1
|
+
set(TARGET llama-server)
|
|
2
2
|
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
|
3
|
-
option(LLAMA_SERVER_SSL
|
|
3
|
+
option(LLAMA_SERVER_SSL "Build SSL support for the server" OFF)
|
|
4
|
+
|
|
4
5
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
|
6
|
+
|
|
7
|
+
if (MINGW)
|
|
8
|
+
# fix: https://github.com/ggerganov/llama.cpp/actions/runs/9651004652/job/26617901362?pr=8006
|
|
9
|
+
add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
|
|
10
|
+
endif()
|
|
11
|
+
|
|
5
12
|
set(TARGET_SRCS
|
|
6
13
|
server.cpp
|
|
7
14
|
utils.hpp
|
|
8
15
|
httplib.h
|
|
9
16
|
)
|
|
10
17
|
set(PUBLIC_ASSETS
|
|
18
|
+
colorthemes.css
|
|
19
|
+
style.css
|
|
20
|
+
theme-beeninorder.css
|
|
21
|
+
theme-ketivah.css
|
|
22
|
+
theme-mangotango.css
|
|
23
|
+
theme-playground.css
|
|
24
|
+
theme-polarnight.css
|
|
25
|
+
theme-snowstorm.css
|
|
11
26
|
index.html
|
|
27
|
+
index-new.html
|
|
12
28
|
index.js
|
|
13
29
|
completion.js
|
|
30
|
+
system-prompts.js
|
|
31
|
+
prompt-formats.js
|
|
14
32
|
json-schema-to-grammar.mjs
|
|
15
33
|
)
|
|
34
|
+
|
|
16
35
|
foreach(asset ${PUBLIC_ASSETS})
|
|
17
36
|
set(input "${CMAKE_CURRENT_SOURCE_DIR}/public/${asset}")
|
|
18
37
|
set(output "${CMAKE_CURRENT_BINARY_DIR}/${asset}.hpp")
|
|
@@ -23,18 +42,23 @@ foreach(asset ${PUBLIC_ASSETS})
|
|
|
23
42
|
COMMAND "${CMAKE_COMMAND}" "-DINPUT=${input}" "-DOUTPUT=${output}" -P "${PROJECT_SOURCE_DIR}/scripts/xxd.cmake"
|
|
24
43
|
)
|
|
25
44
|
endforeach()
|
|
45
|
+
|
|
26
46
|
add_executable(${TARGET} ${TARGET_SRCS})
|
|
27
47
|
install(TARGETS ${TARGET} RUNTIME)
|
|
28
48
|
target_compile_definitions(${TARGET} PRIVATE
|
|
29
49
|
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
|
30
50
|
)
|
|
51
|
+
|
|
31
52
|
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
|
|
53
|
+
|
|
32
54
|
if (LLAMA_SERVER_SSL)
|
|
33
55
|
find_package(OpenSSL REQUIRED)
|
|
34
56
|
target_link_libraries(${TARGET} PRIVATE OpenSSL::SSL OpenSSL::Crypto)
|
|
35
57
|
target_compile_definitions(${TARGET} PRIVATE CPPHTTPLIB_OPENSSL_SUPPORT)
|
|
36
58
|
endif()
|
|
59
|
+
|
|
37
60
|
if (WIN32)
|
|
38
61
|
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
|
|
39
62
|
endif()
|
|
63
|
+
|
|
40
64
|
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|