@fugood/llama.node 0.2.3 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +6 -3
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +8 -1
- package/package.json +3 -3
- package/patches/llama.patch +12 -12
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/LlamaContext.cpp +33 -1
- package/src/LlamaContext.h +1 -0
- package/src/llama.cpp/.github/workflows/bench.yml +310 -0
- package/src/llama.cpp/.github/workflows/build.yml +1315 -0
- package/src/llama.cpp/.github/workflows/close-issue.yml +23 -0
- package/src/llama.cpp/.github/workflows/docker.yml +116 -0
- package/src/llama.cpp/.github/workflows/editorconfig.yml +27 -0
- package/src/llama.cpp/.github/workflows/gguf-publish.yml +44 -0
- package/src/llama.cpp/.github/workflows/labeler.yml +17 -0
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +65 -0
- package/src/llama.cpp/.github/workflows/nix-ci.yml +72 -0
- package/src/llama.cpp/.github/workflows/nix-flake-update.yml +22 -0
- package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +36 -0
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +35 -0
- package/src/llama.cpp/.github/workflows/python-lint.yml +23 -0
- package/src/llama.cpp/.github/workflows/python-type-check.yml +38 -0
- package/src/llama.cpp/.github/workflows/server.yml +183 -0
- package/src/llama.cpp/CMakeLists.txt +91 -1245
- package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +1 -1
- package/src/llama.cpp/cmake/build-info.cmake +58 -0
- package/src/llama.cpp/cmake/git-vars.cmake +22 -0
- package/src/llama.cpp/common/CMakeLists.txt +4 -3
- package/src/llama.cpp/common/build-info.cpp.in +4 -0
- package/src/llama.cpp/common/common.cpp +1116 -877
- package/src/llama.cpp/common/common.h +191 -77
- package/src/llama.cpp/common/grammar-parser.cpp +118 -31
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +346 -65
- package/src/llama.cpp/common/log.h +1 -1
- package/src/llama.cpp/common/ngram-cache.h +10 -3
- package/src/llama.cpp/common/sampling.cpp +19 -10
- package/src/llama.cpp/docs/build.md +353 -0
- package/src/llama.cpp/examples/CMakeLists.txt +22 -22
- package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +6 -6
- package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +52 -55
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +20 -72
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/chat-13B.bat +57 -0
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/{finetune → cvector-generator}/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/cvector-generator/completions.txt +582 -0
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +503 -0
- package/src/llama.cpp/examples/cvector-generator/mean.hpp +48 -0
- package/src/llama.cpp/examples/cvector-generator/negative.txt +4 -0
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +325 -0
- package/src/llama.cpp/examples/cvector-generator/positive.txt +4 -0
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +35 -0
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +94 -46
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +4 -6
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +344 -386
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +30 -25
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf/gguf.cpp +5 -0
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +15 -0
- package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +46 -0
- package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +295 -0
- package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +52 -0
- package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +221 -0
- package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +24 -0
- package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +42 -0
- package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +7093 -0
- package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +693 -0
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +3 -3
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +6 -2
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +137 -176
- package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +38 -153
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +175 -94
- package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +65 -0
- package/src/llama.cpp/examples/llama.android/build.gradle.kts +6 -0
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +68 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +11 -7
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +2 -2
- package/src/llama.cpp/examples/llama.android/settings.gradle.kts +18 -0
- package/src/llama.cpp/examples/llava/CMakeLists.txt +6 -5
- package/src/llama.cpp/examples/llava/android/build_64.sh +8 -0
- package/src/llama.cpp/examples/llava/clip.cpp +23 -14
- package/src/llama.cpp/examples/llava/llava-cli.cpp +8 -6
- package/src/llama.cpp/examples/llava/requirements.txt +3 -2
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +2 -1
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +2 -0
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -2
- package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
- package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/main/main.cpp +98 -75
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +4 -5
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/parallel.cpp +2 -1
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/passkey/passkey.cpp +23 -43
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +13 -10
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +37 -34
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +1 -1
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +26 -77
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +14 -7
- package/src/llama.cpp/examples/server/CMakeLists.txt +26 -2
- package/src/llama.cpp/examples/server/server.cpp +274 -671
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
- package/src/llama.cpp/examples/server/utils.hpp +28 -29
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +21 -29
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +2 -1
- package/src/llama.cpp/examples/sycl/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +23 -0
- package/src/llama.cpp/examples/sycl/run-llama2.sh +36 -0
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +33 -0
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +9 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +16 -2
- package/src/llama.cpp/ggml/CMakeLists.txt +253 -0
- package/src/llama.cpp/{cmake → ggml/cmake}/FindSIMD.cmake +6 -6
- package/src/llama.cpp/{ggml-backend.h → ggml/include/ggml-backend.h} +22 -17
- package/src/llama.cpp/ggml/include/ggml-blas.h +23 -0
- package/src/llama.cpp/ggml/include/ggml-cann.h +125 -0
- package/src/llama.cpp/{ggml-cuda.h → ggml/include/ggml-cuda.h} +3 -0
- package/src/llama.cpp/{ggml-metal.h → ggml/include/ggml-metal.h} +1 -2
- package/src/llama.cpp/{ggml-sycl.h → ggml/include/ggml-sycl.h} +3 -10
- package/src/llama.cpp/{ggml.h → ggml/include/ggml.h} +80 -85
- package/src/llama.cpp/ggml/src/CMakeLists.txt +1329 -0
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +2193 -0
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +39 -0
- package/src/llama.cpp/{ggml-alloc.c → ggml/src/ggml-alloc.c} +100 -49
- package/src/llama.cpp/{ggml-backend-impl.h → ggml/src/ggml-backend-impl.h} +20 -8
- package/src/llama.cpp/{ggml-backend.c → ggml/src/ggml-backend.c} +307 -167
- package/src/llama.cpp/ggml/src/ggml-blas.cpp +367 -0
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +198 -0
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +230 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +2944 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +592 -0
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +282 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +32 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +223 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +180 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +193 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +208 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +206 -0
- package/src/llama.cpp/ggml/src/ggml-cann.cpp +2023 -0
- package/src/llama.cpp/{ggml-common.h → ggml/src/ggml-common.h} +41 -7
- package/src/llama.cpp/{ggml-impl.h → ggml/src/ggml-impl.h} +113 -9
- package/src/llama.cpp/{ggml-kompute.cpp → ggml/src/ggml-kompute.cpp} +33 -18
- package/src/llama.cpp/{ggml-quants.c → ggml/src/ggml-quants.c} +1460 -940
- package/src/llama.cpp/{ggml-quants.h → ggml/src/ggml-quants.h} +19 -20
- package/src/llama.cpp/{ggml-rpc.cpp → ggml/src/ggml-rpc.cpp} +95 -72
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +27 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +53 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +355 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +195 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +547 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +27 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +698 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +27 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +3011 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +3031 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +33 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1027 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +27 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +374 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +35 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +66 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +275 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +22 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +251 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +24 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +1140 -0
- package/src/llama.cpp/ggml/src/ggml-sycl.cpp +5314 -0
- package/src/llama.cpp/{ggml-vulkan.cpp → ggml/src/ggml-vulkan.cpp} +1781 -1868
- package/src/llama.cpp/{ggml.c → ggml/src/ggml.c} +1245 -2087
- package/src/llama.cpp/{sgemm.cpp → ggml/src/llamafile/sgemm.cpp} +21 -24
- package/src/llama.cpp/{sgemm.h → ggml/src/llamafile/sgemm.h} +1 -1
- package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +5 -0
- package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +552 -0
- package/src/llama.cpp/{llama.h → include/llama.h} +175 -100
- package/src/llama.cpp/models/.editorconfig +1 -0
- package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +46 -0
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
- package/src/llama.cpp/requirements/requirements-all.txt +12 -0
- package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +2 -0
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -0
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +3 -0
- package/src/llama.cpp/requirements/{requirements-convert.txt → requirements-convert_legacy_llama.txt} +1 -1
- package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +1 -0
- package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/src/llama.cpp/requirements/requirements-pydantic.txt +3 -0
- package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +1 -0
- package/src/llama.cpp/requirements.txt +5 -4
- package/src/llama.cpp/scripts/build-info.sh +30 -0
- package/src/llama.cpp/scripts/install-oneapi.bat +19 -0
- package/src/llama.cpp/src/CMakeLists.txt +33 -0
- package/src/llama.cpp/src/llama-grammar.cpp +539 -0
- package/src/llama.cpp/src/llama-grammar.h +39 -0
- package/src/llama.cpp/src/llama-impl.h +26 -0
- package/src/llama.cpp/src/llama-sampling.cpp +635 -0
- package/src/llama.cpp/src/llama-sampling.h +56 -0
- package/src/llama.cpp/src/llama-vocab.cpp +1721 -0
- package/src/llama.cpp/src/llama-vocab.h +130 -0
- package/src/llama.cpp/{llama.cpp → src/llama.cpp} +5979 -5260
- package/src/llama.cpp/{unicode-data.cpp → src/unicode-data.cpp} +851 -802
- package/src/llama.cpp/{unicode.cpp → src/unicode.cpp} +52 -30
- package/src/llama.cpp/{unicode.h → src/unicode.h} +5 -1
- package/src/llama.cpp/tests/CMakeLists.txt +19 -20
- package/src/llama.cpp/tests/test-backend-ops.cpp +245 -67
- package/src/llama.cpp/tests/test-chat-template.cpp +57 -3
- package/src/llama.cpp/tests/test-double-float.cpp +2 -2
- package/src/llama.cpp/tests/test-grad0.cpp +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +978 -31
- package/src/llama.cpp/tests/test-grammar-parser.cpp +423 -158
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +508 -135
- package/src/llama.cpp/tests/test-llama-grammar.cpp +15 -9
- package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -1
- package/src/llama.cpp/tests/test-quantize-perf.cpp +1 -1
- package/src/llama.cpp/tests/test-rope.cpp +3 -4
- package/src/llama.cpp/tests/test-sampling.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +6 -6
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +20 -15
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +22 -11
- package/bin/darwin/arm64/default.metallib +0 -0
- package/bin/darwin/x64/default.metallib +0 -0
- package/src/llama.cpp/examples/beam-search/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/beam-search/beam-search.cpp +0 -188
- package/src/llama.cpp/examples/finetune/finetune.cpp +0 -1862
- package/src/llama.cpp/examples/llama.android/llama/CMakeLists.txt +0 -55
- package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +0 -1253
- package/src/llama.cpp/ggml-opencl.cpp +0 -2305
- package/src/llama.cpp/ggml-opencl.h +0 -36
- package/src/llama.cpp/ggml-sycl.cpp +0 -17340
- package/src/llama.cpp/ggml-vulkan-shaders.hpp +0 -81211
- package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf-update.txt +0 -2
- package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +0 -2
- package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +0 -1
- package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +0 -24
- /package/src/llama.cpp/{ggml-alloc.h → ggml/include/ggml-alloc.h} +0 -0
- /package/src/llama.cpp/{ggml-kompute.h → ggml/include/ggml-kompute.h} +0 -0
- /package/src/llama.cpp/{ggml-rpc.h → ggml/include/ggml-rpc.h} +0 -0
- /package/src/llama.cpp/{ggml-vulkan.h → ggml/include/ggml-vulkan.h} +0 -0
- /package/src/llama.cpp/{unicode-data.h → src/unicode-data.h} +0 -0
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
#if defined(_MSC_VER)
|
|
2
|
+
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
|
|
3
|
+
#endif
|
|
4
|
+
|
|
1
5
|
#include "unicode.h"
|
|
2
6
|
#include "unicode-data.h"
|
|
3
7
|
|
|
@@ -15,6 +19,12 @@
|
|
|
15
19
|
#include <locale>
|
|
16
20
|
#include <codecvt>
|
|
17
21
|
|
|
22
|
+
size_t unicode_len_utf8(char src) {
|
|
23
|
+
const size_t lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
|
|
24
|
+
uint8_t highbits = static_cast<uint8_t>(src) >> 4;
|
|
25
|
+
return lookup[highbits];
|
|
26
|
+
}
|
|
27
|
+
|
|
18
28
|
static std::string unicode_cpts_to_utf8(const std::vector<uint32_t> & cps) {
|
|
19
29
|
std::string result;
|
|
20
30
|
for (size_t i = 0; i < cps.size(); ++i) {
|
|
@@ -23,7 +33,7 @@ static std::string unicode_cpts_to_utf8(const std::vector<uint32_t> & cps) {
|
|
|
23
33
|
return result;
|
|
24
34
|
}
|
|
25
35
|
|
|
26
|
-
|
|
36
|
+
uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) {
|
|
27
37
|
assert(offset < utf8.size());
|
|
28
38
|
if (!(utf8[offset + 0] & 0x80)) {
|
|
29
39
|
auto result = utf8[offset + 0];
|
|
@@ -226,13 +236,13 @@ static std::vector<size_t> unicode_regex_split_custom_gpt2(const std::string & t
|
|
|
226
236
|
assert(offset_end <= cpts.size());
|
|
227
237
|
start = offset_end;
|
|
228
238
|
|
|
229
|
-
|
|
230
|
-
|
|
239
|
+
static const uint32_t OUT_OF_RANGE = 0xFFFFFFFF;
|
|
240
|
+
auto _get_cpt = [&] (const size_t pos) -> uint32_t {
|
|
241
|
+
return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE;
|
|
231
242
|
};
|
|
232
243
|
|
|
233
244
|
auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
|
|
234
|
-
|
|
235
|
-
return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags(cpts[pos]) : undef;
|
|
245
|
+
return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags(cpts[pos]) : codepoint_flags{};
|
|
236
246
|
};
|
|
237
247
|
|
|
238
248
|
size_t _prev_end = offset_ini;
|
|
@@ -253,18 +263,18 @@ static std::vector<size_t> unicode_regex_split_custom_gpt2(const std::string & t
|
|
|
253
263
|
};
|
|
254
264
|
|
|
255
265
|
for (size_t pos = offset_ini; pos < offset_end; /*pos++*/ ) {
|
|
256
|
-
const
|
|
266
|
+
const uint32_t cpt = _get_cpt(pos);
|
|
257
267
|
const auto flags = _get_flags(pos);
|
|
258
268
|
|
|
259
269
|
// regex: 's|'t|'re|'ve|'m|'ll|'d
|
|
260
270
|
if (cpt == '\'' && pos+1 < offset_end) {
|
|
261
|
-
|
|
271
|
+
uint32_t cpt_next = _get_cpt(pos+1);
|
|
262
272
|
if (cpt_next == 's' || cpt_next == 't' || cpt_next == 'm' || cpt_next == 'd') {
|
|
263
273
|
pos += _add_token(pos+2);
|
|
264
274
|
continue;
|
|
265
275
|
}
|
|
266
276
|
if (pos+2 < offset_end) {
|
|
267
|
-
|
|
277
|
+
uint32_t cpt_next_next = _get_cpt(pos+2);
|
|
268
278
|
if ((cpt_next == 'r' && cpt_next_next == 'e') ||
|
|
269
279
|
(cpt_next == 'v' && cpt_next_next == 'e') ||
|
|
270
280
|
(cpt_next == 'l' && cpt_next_next == 'l')) {
|
|
@@ -294,9 +304,9 @@ static std::vector<size_t> unicode_regex_split_custom_gpt2(const std::string & t
|
|
|
294
304
|
continue;
|
|
295
305
|
}
|
|
296
306
|
// regex: <space>?[^\s\p{L}\p{N}]+
|
|
297
|
-
if (!(flags2.is_whitespace
|
|
307
|
+
if (!(flags2.is_whitespace | flags2.is_letter | flags2.is_number) && flags2.as_uint()) {
|
|
298
308
|
pos += (cpt == ' ');
|
|
299
|
-
while (!(flags2.is_whitespace
|
|
309
|
+
while (!(flags2.is_whitespace | flags2.is_letter | flags2.is_number) && flags2.as_uint()) {
|
|
300
310
|
flags2 = _get_flags(++pos);
|
|
301
311
|
}
|
|
302
312
|
_add_token(pos);
|
|
@@ -309,7 +319,7 @@ static std::vector<size_t> unicode_regex_split_custom_gpt2(const std::string & t
|
|
|
309
319
|
}
|
|
310
320
|
|
|
311
321
|
// regex: \s+(?!\S)
|
|
312
|
-
if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) !=
|
|
322
|
+
if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) != OUT_OF_RANGE) {
|
|
313
323
|
pos += num_whitespaces - 1;
|
|
314
324
|
_add_token(pos);
|
|
315
325
|
continue;
|
|
@@ -344,13 +354,13 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|
|
344
354
|
assert(offset_end <= cpts.size());
|
|
345
355
|
start = offset_end;
|
|
346
356
|
|
|
347
|
-
|
|
348
|
-
|
|
357
|
+
static const uint32_t OUT_OF_RANGE = 0xFFFFFFFF;
|
|
358
|
+
auto _get_cpt = [&] (const size_t pos) -> uint32_t {
|
|
359
|
+
return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE;
|
|
349
360
|
};
|
|
350
361
|
|
|
351
362
|
auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
|
|
352
|
-
|
|
353
|
-
return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags(cpts[pos]) : undef;
|
|
363
|
+
return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags(cpts[pos]) : codepoint_flags{};
|
|
354
364
|
};
|
|
355
365
|
|
|
356
366
|
size_t _prev_end = offset_ini;
|
|
@@ -371,18 +381,18 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|
|
371
381
|
};
|
|
372
382
|
|
|
373
383
|
for (size_t pos = offset_ini; pos < offset_end; /*pos++*/ ) {
|
|
374
|
-
const
|
|
384
|
+
const uint32_t cpt = _get_cpt(pos);
|
|
375
385
|
const auto flags = _get_flags(pos);
|
|
376
386
|
|
|
377
387
|
// regex: (?i:'s|'t|'re|'ve|'m|'ll|'d) // case insensitive
|
|
378
388
|
if (cpt == '\'' && pos+1 < offset_end) {
|
|
379
|
-
|
|
389
|
+
uint32_t cpt_next = unicode_tolower(_get_cpt(pos+1));
|
|
380
390
|
if (cpt_next == 's' || cpt_next == 't' || cpt_next == 'm' || cpt_next == 'd') {
|
|
381
391
|
pos += _add_token(pos+2);
|
|
382
392
|
continue;
|
|
383
393
|
}
|
|
384
394
|
if (pos+2 < offset_end) {
|
|
385
|
-
|
|
395
|
+
uint32_t cpt_next_next = unicode_tolower(_get_cpt(pos+2));
|
|
386
396
|
if ((cpt_next == 'r' && cpt_next_next == 'e') ||
|
|
387
397
|
(cpt_next == 'v' && cpt_next_next == 'e') ||
|
|
388
398
|
(cpt_next == 'l' && cpt_next_next == 'l')) {
|
|
@@ -392,8 +402,8 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|
|
392
402
|
}
|
|
393
403
|
}
|
|
394
404
|
|
|
395
|
-
// regex: [^\r\n\p{L}\p{N}]?\p{L}+
|
|
396
|
-
if (!(cpt == '\r' || cpt == '\n' ||
|
|
405
|
+
// regex: [^\r\n\p{L}\p{N}]?\p{L}+
|
|
406
|
+
if (!(cpt == '\r' || cpt == '\n' || flags.is_number)) {
|
|
397
407
|
if (flags.is_letter || _get_flags(pos+1).is_letter) { // one or more letters
|
|
398
408
|
pos++;
|
|
399
409
|
while (_get_flags(pos).is_letter) {
|
|
@@ -419,12 +429,12 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|
|
419
429
|
|
|
420
430
|
// regex: <space>?[^\s\p{L}\p{N}]+[\r\n]*
|
|
421
431
|
auto flags2 = (cpt == ' ' ? _get_flags(pos+1) : flags);
|
|
422
|
-
if (!(flags2.is_whitespace
|
|
432
|
+
if (!(flags2.is_whitespace | flags2.is_letter | flags2.is_number) && flags.as_uint()) {
|
|
423
433
|
pos += (cpt == ' ');
|
|
424
|
-
while (!(flags2.is_whitespace
|
|
434
|
+
while (!(flags2.is_whitespace | flags2.is_letter | flags2.is_number) && flags2.as_uint()) {
|
|
425
435
|
flags2 = _get_flags(++pos);
|
|
426
436
|
}
|
|
427
|
-
|
|
437
|
+
uint32_t cpt2 = _get_cpt(pos);
|
|
428
438
|
while (cpt2 == '\r' || cpt2 == '\n') {
|
|
429
439
|
cpt2 = _get_cpt(++pos);
|
|
430
440
|
}
|
|
@@ -435,7 +445,7 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|
|
435
445
|
size_t num_whitespaces = 0;
|
|
436
446
|
size_t last_end_r_or_n = 0;
|
|
437
447
|
while (_get_flags(pos+num_whitespaces).is_whitespace) {
|
|
438
|
-
|
|
448
|
+
uint32_t cpt2 = _get_cpt(pos+num_whitespaces);
|
|
439
449
|
if (cpt2 == '\r' || cpt2 == '\n') {
|
|
440
450
|
last_end_r_or_n = pos + num_whitespaces + 1;
|
|
441
451
|
}
|
|
@@ -450,7 +460,7 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|
|
450
460
|
}
|
|
451
461
|
|
|
452
462
|
// regex: \s+(?!\S)
|
|
453
|
-
if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) !=
|
|
463
|
+
if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) != OUT_OF_RANGE) {
|
|
454
464
|
pos += num_whitespaces - 1;
|
|
455
465
|
_add_token(pos);
|
|
456
466
|
continue;
|
|
@@ -594,6 +604,7 @@ std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & c
|
|
|
594
604
|
|
|
595
605
|
std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8) {
|
|
596
606
|
std::vector<uint32_t> result;
|
|
607
|
+
result.reserve(utf8.size());
|
|
597
608
|
size_t offset = 0;
|
|
598
609
|
while (offset < utf8.size()) {
|
|
599
610
|
result.push_back(unicode_cpt_from_utf8(utf8, offset));
|
|
@@ -626,7 +637,7 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8) {
|
|
|
626
637
|
return map.at(utf8);
|
|
627
638
|
}
|
|
628
639
|
|
|
629
|
-
|
|
640
|
+
uint32_t unicode_tolower(uint32_t cp) {
|
|
630
641
|
auto it = unicode_map_lowercase.find(cp);
|
|
631
642
|
return it == unicode_map_lowercase.end() ? cp : it->second;
|
|
632
643
|
}
|
|
@@ -679,10 +690,14 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
|
|
|
679
690
|
continue;
|
|
680
691
|
}
|
|
681
692
|
|
|
682
|
-
const
|
|
693
|
+
const auto flags = unicode_cpt_flags(cpts[i]);
|
|
683
694
|
|
|
684
|
-
if (
|
|
685
|
-
|
|
695
|
+
if (flags.is_whitespace) {
|
|
696
|
+
//NOTE: C++ std::regex \s does not mach 0x85, Rust and Python regex does.
|
|
697
|
+
//text_collapsed[i] = (char) 0x85; // <Next Line> as whitespace fallback
|
|
698
|
+
text_collapsed[i] = (char) 0x0B; // <vertical tab> as whitespace fallback
|
|
699
|
+
} else if (k_ucat_cpt.find(flags.category_flag()) != k_ucat_cpt.end()) {
|
|
700
|
+
text_collapsed[i] = k_ucat_cpt.at(flags.category_flag());
|
|
686
701
|
} else {
|
|
687
702
|
text_collapsed[i] = (char) 0xD0; // fallback
|
|
688
703
|
}
|
|
@@ -766,9 +781,16 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
|
|
|
766
781
|
bpe_offsets = unicode_regex_split_stl(text_collapsed, regex_expr_collapsed, bpe_offsets);
|
|
767
782
|
} else {
|
|
768
783
|
// no unicode category used, we can use std::wregex directly
|
|
769
|
-
const std::wstring wtext = unicode_wstring_from_utf8(text);
|
|
770
784
|
const std::wstring wregex_expr = unicode_wstring_from_utf8(regex_expr);
|
|
771
785
|
|
|
786
|
+
// std::wregex \s does not mach non-ASCII whitespaces, using 0x0B as fallback
|
|
787
|
+
std::wstring wtext(cpts.begin(), cpts.end());
|
|
788
|
+
for (size_t i = 0; i < wtext.size(); ++i) {
|
|
789
|
+
if (wtext[i] > 0x7F && unicode_cpt_flags(wtext[i]).is_whitespace) {
|
|
790
|
+
wtext[i] = 0x0B;
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
|
|
772
794
|
//printf("text: %s\n", text.c_str());
|
|
773
795
|
//printf("regex_expr: %s\n", regex_expr.c_str());
|
|
774
796
|
bpe_offsets = unicode_regex_split_stl(wtext, wregex_expr, bpe_offsets);
|
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
#include <string>
|
|
5
5
|
#include <vector>
|
|
6
6
|
|
|
7
|
+
// TODO: prefix all symbols with "llama_"
|
|
8
|
+
|
|
7
9
|
struct codepoint_flags {
|
|
8
10
|
enum {
|
|
9
11
|
UNDEFINED = 0x0001,
|
|
@@ -46,8 +48,10 @@ struct codepoint_flags {
|
|
|
46
48
|
}
|
|
47
49
|
};
|
|
48
50
|
|
|
51
|
+
size_t unicode_len_utf8(char src);
|
|
49
52
|
|
|
50
53
|
std::string unicode_cpt_to_utf8(uint32_t cp);
|
|
54
|
+
uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset);
|
|
51
55
|
std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8);
|
|
52
56
|
|
|
53
57
|
std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & cpts);
|
|
@@ -58,6 +62,6 @@ codepoint_flags unicode_cpt_flags(const std::string & utf8);
|
|
|
58
62
|
std::string unicode_byte_to_utf8(uint8_t byte);
|
|
59
63
|
uint8_t unicode_utf8_to_byte(const std::string & utf8);
|
|
60
64
|
|
|
61
|
-
|
|
65
|
+
uint32_t unicode_tolower(uint32_t cp);
|
|
62
66
|
|
|
63
67
|
std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs);
|
|
@@ -70,21 +70,19 @@ add_executable(test-tokenizer-0 test-tokenizer-0.cpp)
|
|
|
70
70
|
target_link_libraries(test-tokenizer-0 PRIVATE common)
|
|
71
71
|
install(TARGETS test-tokenizer-0 RUNTIME)
|
|
72
72
|
|
|
73
|
-
llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-spm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-spm.gguf)
|
|
74
|
-
llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-bpe ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf)
|
|
75
|
-
llama_test(test-tokenizer-0 NAME test-tokenizer-0-phi-3 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-phi-3.gguf)
|
|
76
|
-
llama_test(test-tokenizer-0 NAME test-tokenizer-0-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
|
|
77
73
|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-bert-bge ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-bert-bge.gguf)
|
|
78
|
-
# TODO: enable when fixed
|
|
79
|
-
# https://github.com/ggerganov/llama.cpp/pull/7036
|
|
80
|
-
#llama_test(test-tokenizer-0 NAME test-tokenizer-0-mpt ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf)
|
|
81
|
-
#llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-llm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-llm.gguf)
|
|
82
|
-
#llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-coder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-coder.gguf)
|
|
83
|
-
llama_test(test-tokenizer-0 NAME test-tokenizer-0-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf)
|
|
84
|
-
llama_test(test-tokenizer-0 NAME test-tokenizer-0-gpt-2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-2.gguf)
|
|
85
|
-
llama_test(test-tokenizer-0 NAME test-tokenizer-0-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf)
|
|
86
74
|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-command-r ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-command-r.gguf)
|
|
75
|
+
llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-coder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-coder.gguf)
|
|
76
|
+
llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-llm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-llm.gguf)
|
|
77
|
+
llama_test(test-tokenizer-0 NAME test-tokenizer-0-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
|
|
78
|
+
llama_test(test-tokenizer-0 NAME test-tokenizer-0-gpt-2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-2.gguf)
|
|
79
|
+
llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-bpe ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf)
|
|
80
|
+
llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-spm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-spm.gguf)
|
|
81
|
+
llama_test(test-tokenizer-0 NAME test-tokenizer-0-mpt ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf)
|
|
82
|
+
llama_test(test-tokenizer-0 NAME test-tokenizer-0-phi-3 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-phi-3.gguf)
|
|
87
83
|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-qwen2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-qwen2.gguf)
|
|
84
|
+
llama_test(test-tokenizer-0 NAME test-tokenizer-0-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf)
|
|
85
|
+
llama_test(test-tokenizer-0 NAME test-tokenizer-0-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf)
|
|
88
86
|
|
|
89
87
|
# build test-tokenizer-1-bpe target once and add many tests
|
|
90
88
|
add_executable(test-tokenizer-1-bpe test-tokenizer-1-bpe.cpp)
|
|
@@ -92,16 +90,14 @@ target_link_libraries(test-tokenizer-1-bpe PRIVATE common)
|
|
|
92
90
|
install(TARGETS test-tokenizer-1-bpe RUNTIME)
|
|
93
91
|
|
|
94
92
|
# TODO: disabled due to slowness
|
|
95
|
-
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-llama-bpe ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf --ignore-merges)
|
|
96
|
-
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
|
|
97
93
|
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-aquila ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
|
|
98
|
-
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-
|
|
99
|
-
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-
|
|
94
|
+
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
|
|
95
|
+
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-2.gguf)
|
|
100
96
|
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-neox ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-neox.gguf)
|
|
97
|
+
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-llama-bpe ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf --ignore-merges)
|
|
98
|
+
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-mpt ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf)
|
|
101
99
|
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf)
|
|
102
100
|
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf)
|
|
103
|
-
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt2.gguf)
|
|
104
|
-
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-bloom ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-bloom.gguf)
|
|
105
101
|
|
|
106
102
|
# build test-tokenizer-1-spm target once and add many tests
|
|
107
103
|
add_executable(test-tokenizer-1-spm test-tokenizer-1-spm.cpp)
|
|
@@ -129,8 +125,11 @@ llama_target_and_test(test-rope.cpp)
|
|
|
129
125
|
llama_target_and_test(test-model-load-cancel.cpp LABEL "model")
|
|
130
126
|
llama_target_and_test(test-autorelease.cpp LABEL "model")
|
|
131
127
|
|
|
132
|
-
|
|
133
|
-
|
|
128
|
+
# TODO: disabled on loongarch64 because the ggml-ci node lacks Python 3.8
|
|
129
|
+
if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
|
|
130
|
+
llama_target_and_test(test-json-schema-to-grammar.cpp WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
|
|
131
|
+
target_include_directories(test-json-schema-to-grammar PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../examples/server)
|
|
132
|
+
endif()
|
|
134
133
|
|
|
135
134
|
# dummy executable - not installed
|
|
136
135
|
get_filename_component(TEST_TARGET test-c.c NAME_WE)
|