@fugood/llama.node 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +5 -2
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +8 -1
- package/package.json +1 -1
- package/patches/llama.patch +12 -12
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/LlamaContext.cpp +33 -1
- package/src/LlamaContext.h +1 -0
- package/src/LoadSessionWorker.cpp +1 -0
- package/src/llama.cpp/.github/workflows/bench.yml +310 -0
- package/src/llama.cpp/.github/workflows/build.yml +1315 -0
- package/src/llama.cpp/.github/workflows/close-issue.yml +23 -0
- package/src/llama.cpp/.github/workflows/docker.yml +116 -0
- package/src/llama.cpp/.github/workflows/editorconfig.yml +27 -0
- package/src/llama.cpp/.github/workflows/gguf-publish.yml +44 -0
- package/src/llama.cpp/.github/workflows/labeler.yml +17 -0
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +65 -0
- package/src/llama.cpp/.github/workflows/nix-ci.yml +72 -0
- package/src/llama.cpp/.github/workflows/nix-flake-update.yml +22 -0
- package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +36 -0
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +35 -0
- package/src/llama.cpp/.github/workflows/python-lint.yml +23 -0
- package/src/llama.cpp/.github/workflows/python-type-check.yml +38 -0
- package/src/llama.cpp/.github/workflows/server.yml +183 -0
- package/src/llama.cpp/CMakeLists.txt +91 -1245
- package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +1 -1
- package/src/llama.cpp/cmake/build-info.cmake +58 -0
- package/src/llama.cpp/cmake/git-vars.cmake +22 -0
- package/src/llama.cpp/common/CMakeLists.txt +4 -3
- package/src/llama.cpp/common/build-info.cpp.in +4 -0
- package/src/llama.cpp/common/common.cpp +1116 -877
- package/src/llama.cpp/common/common.h +191 -77
- package/src/llama.cpp/common/grammar-parser.cpp +118 -31
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +346 -65
- package/src/llama.cpp/common/log.h +1 -1
- package/src/llama.cpp/common/ngram-cache.h +10 -3
- package/src/llama.cpp/common/sampling.cpp +19 -10
- package/src/llama.cpp/docs/build.md +353 -0
- package/src/llama.cpp/examples/CMakeLists.txt +22 -22
- package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +6 -6
- package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +52 -55
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +20 -72
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/chat-13B.bat +57 -0
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/{finetune → cvector-generator}/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/cvector-generator/completions.txt +582 -0
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +503 -0
- package/src/llama.cpp/examples/cvector-generator/mean.hpp +48 -0
- package/src/llama.cpp/examples/cvector-generator/negative.txt +4 -0
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +325 -0
- package/src/llama.cpp/examples/cvector-generator/positive.txt +4 -0
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +35 -0
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +94 -46
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +4 -6
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +344 -386
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +30 -25
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf/gguf.cpp +5 -0
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +15 -0
- package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +46 -0
- package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +295 -0
- package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +52 -0
- package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +221 -0
- package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +24 -0
- package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +42 -0
- package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +7093 -0
- package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +693 -0
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +3 -3
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +6 -2
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +137 -176
- package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +38 -153
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +175 -94
- package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +65 -0
- package/src/llama.cpp/examples/llama.android/build.gradle.kts +6 -0
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +68 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +11 -7
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +2 -2
- package/src/llama.cpp/examples/llama.android/settings.gradle.kts +18 -0
- package/src/llama.cpp/examples/llava/CMakeLists.txt +6 -5
- package/src/llama.cpp/examples/llava/android/build_64.sh +8 -0
- package/src/llama.cpp/examples/llava/clip.cpp +23 -14
- package/src/llama.cpp/examples/llava/llava-cli.cpp +8 -6
- package/src/llama.cpp/examples/llava/requirements.txt +3 -2
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +2 -1
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +2 -0
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -2
- package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
- package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/main/main.cpp +98 -75
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +4 -5
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/parallel.cpp +2 -1
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/passkey/passkey.cpp +23 -43
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +13 -10
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +37 -34
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +1 -1
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +26 -77
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +14 -7
- package/src/llama.cpp/examples/server/CMakeLists.txt +26 -2
- package/src/llama.cpp/examples/server/server.cpp +274 -671
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
- package/src/llama.cpp/examples/server/utils.hpp +28 -29
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +21 -29
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +2 -1
- package/src/llama.cpp/examples/sycl/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +23 -0
- package/src/llama.cpp/examples/sycl/run-llama2.sh +36 -0
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +33 -0
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +9 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +16 -2
- package/src/llama.cpp/ggml/CMakeLists.txt +253 -0
- package/src/llama.cpp/{cmake → ggml/cmake}/FindSIMD.cmake +6 -6
- package/src/llama.cpp/{ggml-backend.h → ggml/include/ggml-backend.h} +22 -17
- package/src/llama.cpp/ggml/include/ggml-blas.h +23 -0
- package/src/llama.cpp/ggml/include/ggml-cann.h +125 -0
- package/src/llama.cpp/{ggml-cuda.h → ggml/include/ggml-cuda.h} +3 -0
- package/src/llama.cpp/{ggml-metal.h → ggml/include/ggml-metal.h} +1 -2
- package/src/llama.cpp/{ggml-sycl.h → ggml/include/ggml-sycl.h} +3 -10
- package/src/llama.cpp/{ggml.h → ggml/include/ggml.h} +80 -85
- package/src/llama.cpp/ggml/src/CMakeLists.txt +1329 -0
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +2193 -0
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +39 -0
- package/src/llama.cpp/{ggml-alloc.c → ggml/src/ggml-alloc.c} +100 -49
- package/src/llama.cpp/{ggml-backend-impl.h → ggml/src/ggml-backend-impl.h} +20 -8
- package/src/llama.cpp/{ggml-backend.c → ggml/src/ggml-backend.c} +307 -167
- package/src/llama.cpp/ggml/src/ggml-blas.cpp +367 -0
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +198 -0
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +230 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +2944 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +592 -0
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +282 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +32 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +223 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +180 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +193 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +208 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +206 -0
- package/src/llama.cpp/ggml/src/ggml-cann.cpp +2023 -0
- package/src/llama.cpp/{ggml-common.h → ggml/src/ggml-common.h} +41 -7
- package/src/llama.cpp/{ggml-impl.h → ggml/src/ggml-impl.h} +113 -9
- package/src/llama.cpp/{ggml-kompute.cpp → ggml/src/ggml-kompute.cpp} +33 -18
- package/src/llama.cpp/{ggml-quants.c → ggml/src/ggml-quants.c} +1460 -940
- package/src/llama.cpp/{ggml-quants.h → ggml/src/ggml-quants.h} +19 -20
- package/src/llama.cpp/{ggml-rpc.cpp → ggml/src/ggml-rpc.cpp} +95 -72
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +27 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +53 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +355 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +195 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +547 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +27 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +698 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +27 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +3011 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +3031 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +33 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1027 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +27 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +374 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +35 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +66 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +275 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +22 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +251 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +24 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +1140 -0
- package/src/llama.cpp/ggml/src/ggml-sycl.cpp +5314 -0
- package/src/llama.cpp/{ggml-vulkan.cpp → ggml/src/ggml-vulkan.cpp} +1781 -1868
- package/src/llama.cpp/{ggml.c → ggml/src/ggml.c} +1245 -2087
- package/src/llama.cpp/{sgemm.cpp → ggml/src/llamafile/sgemm.cpp} +21 -24
- package/src/llama.cpp/{sgemm.h → ggml/src/llamafile/sgemm.h} +1 -1
- package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +5 -0
- package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +552 -0
- package/src/llama.cpp/{llama.h → include/llama.h} +175 -100
- package/src/llama.cpp/models/.editorconfig +1 -0
- package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +46 -0
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
- package/src/llama.cpp/requirements/requirements-all.txt +12 -0
- package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +2 -0
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -0
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +3 -0
- package/src/llama.cpp/requirements/{requirements-convert.txt → requirements-convert_legacy_llama.txt} +1 -1
- package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +1 -0
- package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/src/llama.cpp/requirements/requirements-pydantic.txt +3 -0
- package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +1 -0
- package/src/llama.cpp/requirements.txt +5 -4
- package/src/llama.cpp/scripts/build-info.sh +30 -0
- package/src/llama.cpp/scripts/install-oneapi.bat +19 -0
- package/src/llama.cpp/src/CMakeLists.txt +33 -0
- package/src/llama.cpp/src/llama-grammar.cpp +539 -0
- package/src/llama.cpp/src/llama-grammar.h +39 -0
- package/src/llama.cpp/src/llama-impl.h +26 -0
- package/src/llama.cpp/src/llama-sampling.cpp +635 -0
- package/src/llama.cpp/src/llama-sampling.h +56 -0
- package/src/llama.cpp/src/llama-vocab.cpp +1721 -0
- package/src/llama.cpp/src/llama-vocab.h +130 -0
- package/src/llama.cpp/{llama.cpp → src/llama.cpp} +5979 -5260
- package/src/llama.cpp/{unicode-data.cpp → src/unicode-data.cpp} +851 -802
- package/src/llama.cpp/{unicode.cpp → src/unicode.cpp} +52 -30
- package/src/llama.cpp/{unicode.h → src/unicode.h} +5 -1
- package/src/llama.cpp/tests/CMakeLists.txt +19 -20
- package/src/llama.cpp/tests/test-backend-ops.cpp +245 -67
- package/src/llama.cpp/tests/test-chat-template.cpp +57 -3
- package/src/llama.cpp/tests/test-double-float.cpp +2 -2
- package/src/llama.cpp/tests/test-grad0.cpp +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +978 -31
- package/src/llama.cpp/tests/test-grammar-parser.cpp +423 -158
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +508 -135
- package/src/llama.cpp/tests/test-llama-grammar.cpp +15 -9
- package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -1
- package/src/llama.cpp/tests/test-quantize-perf.cpp +1 -1
- package/src/llama.cpp/tests/test-rope.cpp +3 -4
- package/src/llama.cpp/tests/test-sampling.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +6 -6
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +20 -15
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +22 -11
- package/bin/darwin/arm64/default.metallib +0 -0
- package/bin/darwin/x64/default.metallib +0 -0
- package/src/llama.cpp/examples/beam-search/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/beam-search/beam-search.cpp +0 -188
- package/src/llama.cpp/examples/finetune/finetune.cpp +0 -1862
- package/src/llama.cpp/examples/llama.android/llama/CMakeLists.txt +0 -55
- package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +0 -1253
- package/src/llama.cpp/ggml-opencl.cpp +0 -2305
- package/src/llama.cpp/ggml-opencl.h +0 -36
- package/src/llama.cpp/ggml-sycl.cpp +0 -17340
- package/src/llama.cpp/ggml-vulkan-shaders.hpp +0 -81211
- package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf-update.txt +0 -2
- package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +0 -2
- package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +0 -1
- package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +0 -24
- /package/src/llama.cpp/{ggml-alloc.h → ggml/include/ggml-alloc.h} +0 -0
- /package/src/llama.cpp/{ggml-kompute.h → ggml/include/ggml-kompute.h} +0 -0
- /package/src/llama.cpp/{ggml-rpc.h → ggml/include/ggml-rpc.h} +0 -0
- /package/src/llama.cpp/{ggml-vulkan.h → ggml/include/ggml-vulkan.h} +0 -0
- /package/src/llama.cpp/{unicode-data.h → src/unicode-data.h} +0 -0
|
@@ -16,92 +16,282 @@ static std::string join(Iterator begin, Iterator end, const std::string & separa
|
|
|
16
16
|
|
|
17
17
|
static std::string repeat(const std::string & str, size_t n);
|
|
18
18
|
|
|
19
|
-
static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = ""
|
|
19
|
+
static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "") {
|
|
20
|
+
auto has_max = max_items != std::numeric_limits<int>::max();
|
|
21
|
+
|
|
22
|
+
if (min_items == 0 && max_items == 1) {
|
|
23
|
+
return item_rule + "?";
|
|
24
|
+
}
|
|
25
|
+
|
|
20
26
|
if (separator_rule.empty()) {
|
|
21
|
-
if (min_items ==
|
|
22
|
-
return item_rule + "?";
|
|
23
|
-
} else if (min_items == 1 && max_items == std::numeric_limits<int>::max()) {
|
|
27
|
+
if (min_items == 1 && !has_max) {
|
|
24
28
|
return item_rule + "+";
|
|
29
|
+
} else if (min_items == 0 && !has_max) {
|
|
30
|
+
return item_rule + "*";
|
|
31
|
+
} else {
|
|
32
|
+
return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}";
|
|
25
33
|
}
|
|
26
34
|
}
|
|
27
35
|
|
|
28
|
-
|
|
29
|
-
if (min_items
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
36
|
+
auto result = item_rule + " " + build_repetition("(" + separator_rule + " " + item_rule + ")", min_items == 0 ? 0 : min_items - 1, has_max ? max_items - 1 : max_items);
|
|
37
|
+
if (min_items == 0) {
|
|
38
|
+
result = "(" + result + ")?";
|
|
39
|
+
}
|
|
40
|
+
return result;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/* Minimalistic replacement for std::string_view, which is only available from C++17 onwards */
|
|
44
|
+
class string_view {
|
|
45
|
+
const std::string & _str;
|
|
46
|
+
const size_t _start;
|
|
47
|
+
const size_t _end;
|
|
48
|
+
public:
|
|
49
|
+
string_view(const std::string & str, size_t start = 0, size_t end = std::string::npos) : _str(str), _start(start), _end(end == std::string::npos ? str.length() : end) {}
|
|
50
|
+
|
|
51
|
+
size_t size() const {
|
|
52
|
+
return _end - _start;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
size_t length() const {
|
|
56
|
+
return size();
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
operator std::string() const {
|
|
60
|
+
return str();
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
std::string str() const {
|
|
64
|
+
return _str.substr(_start, _end - _start);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
string_view substr(size_t pos, size_t len = std::string::npos) const {
|
|
68
|
+
return string_view(_str, _start + pos, len == std::string::npos ? _end : _start + pos + len);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
char operator[](size_t pos) const {
|
|
72
|
+
auto index = _start + pos;
|
|
73
|
+
if (index >= _end) {
|
|
74
|
+
throw std::out_of_range("string_view index out of range");
|
|
35
75
|
}
|
|
76
|
+
return _str[_start + pos];
|
|
36
77
|
}
|
|
37
78
|
|
|
38
|
-
|
|
39
|
-
|
|
79
|
+
bool operator==(const string_view & other) const {
|
|
80
|
+
std::string this_str = *this;
|
|
81
|
+
std::string other_str = other;
|
|
82
|
+
return this_str == other_str;
|
|
83
|
+
}
|
|
84
|
+
};
|
|
40
85
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
86
|
+
static void _build_min_max_int(int min_value, int max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
|
|
87
|
+
auto has_min = min_value != std::numeric_limits<int>::min();
|
|
88
|
+
auto has_max = max_value != std::numeric_limits<int>::max();
|
|
89
|
+
|
|
90
|
+
auto digit_range = [&](char from, char to) {
|
|
91
|
+
out << "[";
|
|
92
|
+
if (from == to) {
|
|
93
|
+
out << from;
|
|
47
94
|
} else {
|
|
48
|
-
|
|
49
|
-
// strip trailing space
|
|
50
|
-
res = res.substr(0, res.length() - 1);
|
|
51
|
-
res += repeat(")?", up_to_n);
|
|
52
|
-
return res;
|
|
95
|
+
out << from << "-" << to;
|
|
53
96
|
}
|
|
97
|
+
out << "]";
|
|
54
98
|
};
|
|
99
|
+
auto more_digits = [&](int min_digits, int max_digits) {
|
|
100
|
+
out << "[0-9]";
|
|
101
|
+
if (min_digits == max_digits && min_digits == 1) {
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
out << "{";
|
|
105
|
+
out << min_digits;
|
|
106
|
+
if (max_digits != min_digits) {
|
|
107
|
+
out << ",";
|
|
108
|
+
if (max_digits != std::numeric_limits<int>::max()) {
|
|
109
|
+
out << max_digits;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
out << "}";
|
|
113
|
+
};
|
|
114
|
+
std::function<void(const string_view &, const string_view &)> uniform_range =
|
|
115
|
+
[&](const string_view & from, const string_view & to) {
|
|
116
|
+
size_t i = 0;
|
|
117
|
+
while (i < from.length() && i < to.length() && from[i] == to[i]) {
|
|
118
|
+
i++;
|
|
119
|
+
}
|
|
120
|
+
if (i > 0) {
|
|
121
|
+
out << "\"" << from.substr(0, i).str() << "\"";
|
|
122
|
+
}
|
|
123
|
+
if (i < from.length() && i < to.length()) {
|
|
124
|
+
if (i > 0) {
|
|
125
|
+
out << " ";
|
|
126
|
+
}
|
|
127
|
+
auto sub_len = from.length() - i - 1;
|
|
128
|
+
if (sub_len > 0) {
|
|
129
|
+
auto from_sub = from.substr(i + 1);
|
|
130
|
+
auto to_sub = to.substr(i + 1);
|
|
131
|
+
auto sub_zeros = repeat("0", sub_len);
|
|
132
|
+
auto sub_nines = repeat("9", sub_len);
|
|
133
|
+
|
|
134
|
+
auto to_reached = false;
|
|
135
|
+
out << "(";
|
|
136
|
+
if (from_sub == sub_zeros) {
|
|
137
|
+
digit_range(from[i], to[i] - 1);
|
|
138
|
+
out << " ";
|
|
139
|
+
more_digits(sub_len, sub_len);
|
|
140
|
+
} else {
|
|
141
|
+
out << "[" << from[i] << "] ";
|
|
142
|
+
out << "(";
|
|
143
|
+
uniform_range(from_sub, sub_nines);
|
|
144
|
+
out << ")";
|
|
145
|
+
if (from[i] < to[i] - 1) {
|
|
146
|
+
out << " | ";
|
|
147
|
+
if (to_sub == sub_nines) {
|
|
148
|
+
digit_range(from[i] + 1, to[i]);
|
|
149
|
+
to_reached = true;
|
|
150
|
+
} else {
|
|
151
|
+
digit_range(from[i] + 1, to[i] - 1);
|
|
152
|
+
}
|
|
153
|
+
out << " ";
|
|
154
|
+
more_digits(sub_len, sub_len);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
if (!to_reached) {
|
|
158
|
+
out << " | ";
|
|
159
|
+
digit_range(to[i], to[i]);
|
|
160
|
+
out << " ";
|
|
161
|
+
uniform_range(sub_zeros, to_sub);
|
|
162
|
+
}
|
|
163
|
+
out << ")";
|
|
164
|
+
} else {
|
|
165
|
+
out << "[" << from[i] << "-" << to[i] << "]";
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
};
|
|
169
|
+
|
|
170
|
+
if (has_min && has_max) {
|
|
171
|
+
if (min_value < 0 && max_value < 0) {
|
|
172
|
+
out << "\"-\" (";
|
|
173
|
+
_build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true);
|
|
174
|
+
out << ")";
|
|
175
|
+
return;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
if (min_value < 0) {
|
|
179
|
+
out << "\"-\" (";
|
|
180
|
+
_build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true);
|
|
181
|
+
out << ") | ";
|
|
182
|
+
min_value = 0;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
auto min_s = std::to_string(min_value);
|
|
186
|
+
auto max_s = std::to_string(max_value);
|
|
187
|
+
auto min_digits = min_s.length();
|
|
188
|
+
auto max_digits = max_s.length();
|
|
55
189
|
|
|
56
|
-
|
|
57
|
-
|
|
190
|
+
for (auto digits = min_digits; digits < max_digits; digits++) {
|
|
191
|
+
uniform_range(min_s, repeat("9", digits));
|
|
192
|
+
min_s = "1" + repeat("0", digits);
|
|
193
|
+
out << " | ";
|
|
194
|
+
}
|
|
195
|
+
uniform_range(min_s, max_s);
|
|
196
|
+
return;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
auto less_decimals = std::max(decimals_left - 1, 1);
|
|
200
|
+
|
|
201
|
+
if (has_min) {
|
|
202
|
+
if (min_value < 0) {
|
|
203
|
+
out << "\"-\" (";
|
|
204
|
+
_build_min_max_int(std::numeric_limits<int>::min(), -min_value, out, decimals_left, /* top_level= */ false);
|
|
205
|
+
out << ") | [0] | [1-9] ";
|
|
206
|
+
more_digits(0, decimals_left - 1);
|
|
207
|
+
} else if (min_value == 0) {
|
|
208
|
+
if (top_level) {
|
|
209
|
+
out << "[0] | [1-9] ";
|
|
210
|
+
more_digits(0, less_decimals);
|
|
211
|
+
} else {
|
|
212
|
+
more_digits(1, decimals_left);
|
|
213
|
+
}
|
|
214
|
+
} else if (min_value <= 9) {
|
|
215
|
+
char c = '0' + min_value;
|
|
216
|
+
auto range_start = top_level ? '1' : '0';
|
|
217
|
+
if (c > range_start) {
|
|
218
|
+
digit_range(range_start, c - 1);
|
|
219
|
+
out << " ";
|
|
220
|
+
more_digits(1, less_decimals);
|
|
221
|
+
out << " | ";
|
|
222
|
+
}
|
|
223
|
+
digit_range(c, '9');
|
|
224
|
+
out << " ";
|
|
225
|
+
more_digits(0, less_decimals);
|
|
226
|
+
} else {
|
|
227
|
+
auto min_s = std::to_string(min_value);
|
|
228
|
+
auto len = min_s.length();
|
|
229
|
+
auto c = min_s[0];
|
|
230
|
+
|
|
231
|
+
if (c > '1') {
|
|
232
|
+
digit_range(top_level ? '1' : '0', c - 1);
|
|
233
|
+
out << " ";
|
|
234
|
+
more_digits(len, less_decimals);
|
|
235
|
+
out << " | ";
|
|
236
|
+
}
|
|
237
|
+
digit_range(c, c);
|
|
238
|
+
out << " (";
|
|
239
|
+
_build_min_max_int(std::stoi(min_s.substr(1)), std::numeric_limits<int>::max(), out, less_decimals, /* top_level= */ false);
|
|
240
|
+
out << ")";
|
|
241
|
+
if (c < '9') {
|
|
242
|
+
out << " | ";
|
|
243
|
+
digit_range(c + 1, '9');
|
|
244
|
+
out << " ";
|
|
245
|
+
more_digits(len - 1, less_decimals);
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
return;
|
|
58
249
|
}
|
|
59
250
|
|
|
60
|
-
if (
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
251
|
+
if (has_max) {
|
|
252
|
+
if (max_value >= 0) {
|
|
253
|
+
if (top_level) {
|
|
254
|
+
out << "\"-\" [1-9] ";
|
|
255
|
+
more_digits(0, less_decimals);
|
|
256
|
+
out << " | ";
|
|
257
|
+
}
|
|
258
|
+
_build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true);
|
|
66
259
|
} else {
|
|
67
|
-
|
|
260
|
+
out << "\"-\" (";
|
|
261
|
+
_build_min_max_int(-max_value, std::numeric_limits<int>::max(), out, decimals_left, /* top_level= */ false);
|
|
262
|
+
out << ")";
|
|
68
263
|
}
|
|
264
|
+
return;
|
|
69
265
|
}
|
|
70
266
|
|
|
71
|
-
|
|
267
|
+
throw std::runtime_error("At least one of min_value or max_value must be set");
|
|
72
268
|
}
|
|
73
269
|
|
|
74
|
-
const std::string SPACE_RULE = "\" \"
|
|
270
|
+
const std::string SPACE_RULE = "| \" \" | \"\\n\" [ \\t]{0,20}";
|
|
75
271
|
|
|
76
272
|
struct BuiltinRule {
|
|
77
273
|
std::string content;
|
|
78
274
|
std::vector<std::string> deps;
|
|
79
275
|
};
|
|
80
276
|
|
|
81
|
-
const std::string _up_to_15_digits = build_repetition("[0-9]", 0, 15);
|
|
82
|
-
|
|
83
277
|
std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
|
|
84
278
|
{"boolean", {"(\"true\" | \"false\") space", {}}},
|
|
85
|
-
{"decimal-part", {"[0-9]
|
|
86
|
-
{"integral-part", {"[0
|
|
279
|
+
{"decimal-part", {"[0-9]{1,16}", {}}},
|
|
280
|
+
{"integral-part", {"[0] | [1-9] [0-9]{0,15}", {}}},
|
|
87
281
|
{"number", {"(\"-\"? integral-part) (\".\" decimal-part)? ([eE] [-+]? integral-part)? space", {"integral-part", "decimal-part"}}},
|
|
88
282
|
{"integer", {"(\"-\"? integral-part) space", {"integral-part"}}},
|
|
89
283
|
{"value", {"object | array | string | number | boolean | null", {"object", "array", "string", "number", "boolean", "null"}}},
|
|
90
284
|
{"object", {"\"{\" space ( string \":\" space value (\",\" space string \":\" space value)* )? \"}\" space", {"string", "value"}}},
|
|
91
285
|
{"array", {"\"[\" space ( value (\",\" space value)* )? \"]\" space", {"value"}}},
|
|
92
|
-
{"uuid", {"\"\\\"\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]
|
|
93
|
-
|
|
94
|
-
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
|
95
|
-
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
|
96
|
-
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] \"\\\"\" space", {}}},
|
|
97
|
-
{"char", {"[^\"\\\\] | \"\\\\\" ([\"\\\\/bfnrt] | \"u\" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])", {}}},
|
|
286
|
+
{"uuid", {"\"\\\"\" [0-9a-fA-F]{8} \"-\" [0-9a-fA-F]{4} \"-\" [0-9a-fA-F]{4} \"-\" [0-9a-fA-F]{4} \"-\" [0-9a-fA-F]{12} \"\\\"\" space", {}}},
|
|
287
|
+
{"char", {"[^\"\\\\\\x7F\\x00-\\x1F] | [\\\\] ([\"\\\\bfnrt] | \"u\" [0-9a-fA-F]{4})", {}}},
|
|
98
288
|
{"string", {"\"\\\"\" char* \"\\\"\" space", {"char"}}},
|
|
99
289
|
{"null", {"\"null\" space", {}}},
|
|
100
290
|
};
|
|
101
291
|
|
|
102
292
|
std::unordered_map<std::string, BuiltinRule> STRING_FORMAT_RULES = {
|
|
103
|
-
{"date", {"[0-9]
|
|
104
|
-
{"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9]
|
|
293
|
+
{"date", {"[0-9]{4} \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )", {}}},
|
|
294
|
+
{"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9]{3} )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )", {}}},
|
|
105
295
|
{"date-time", {"date \"T\" time", {"date", "time"}}},
|
|
106
296
|
{"date-string", {"\"\\\"\" date \"\\\"\" space", {"date"}}},
|
|
107
297
|
{"time-string", {"\"\\\"\" time \"\\\"\" space", {"time"}}},
|
|
@@ -126,7 +316,7 @@ std::unordered_map<char, std::string> GRAMMAR_LITERAL_ESCAPES = {
|
|
|
126
316
|
};
|
|
127
317
|
|
|
128
318
|
std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
|
|
129
|
-
std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
|
|
319
|
+
std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
|
|
130
320
|
|
|
131
321
|
template <typename Iterator>
|
|
132
322
|
std::string join(Iterator begin, Iterator end, const std::string & separator) {
|
|
@@ -197,7 +387,6 @@ static std::string format_literal(const std::string & literal) {
|
|
|
197
387
|
return "\"" + escaped + "\"";
|
|
198
388
|
}
|
|
199
389
|
|
|
200
|
-
|
|
201
390
|
class SchemaConverter {
|
|
202
391
|
private:
|
|
203
392
|
std::function<json(const std::string &)> _fetch_json;
|
|
@@ -385,8 +574,7 @@ private:
|
|
|
385
574
|
sub_is_literal ? "\"" + sub + "\"" : sub,
|
|
386
575
|
min_times,
|
|
387
576
|
max_times,
|
|
388
|
-
""
|
|
389
|
-
sub_is_literal
|
|
577
|
+
""
|
|
390
578
|
);
|
|
391
579
|
seq.back().second = false;
|
|
392
580
|
} else {
|
|
@@ -426,6 +614,75 @@ private:
|
|
|
426
614
|
return _add_rule(name, "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space");
|
|
427
615
|
}
|
|
428
616
|
|
|
617
|
+
/*
|
|
618
|
+
Returns a rule that matches a JSON string that is none of the provided strings
|
|
619
|
+
|
|
620
|
+
not_strings({"a"})
|
|
621
|
+
-> ["] ( [a] char+ | [^"a] char* )? ["] space
|
|
622
|
+
not_strings({"and", "also"})
|
|
623
|
+
-> ["] ( [a] ([l] ([s] ([o] char+ | [^"o] char*) | [^"s] char*) | [n] ([d] char+ | [^"d] char*) | [^"ln] char*) | [^"a] char* )? ["] space
|
|
624
|
+
*/
|
|
625
|
+
std::string _not_strings(const std::vector<std::string> & strings) {
|
|
626
|
+
|
|
627
|
+
struct TrieNode {
|
|
628
|
+
std::map<char, TrieNode> children;
|
|
629
|
+
bool is_end_of_string;
|
|
630
|
+
|
|
631
|
+
TrieNode() : is_end_of_string(false) {}
|
|
632
|
+
|
|
633
|
+
void insert(const std::string & string) {
|
|
634
|
+
auto node = this;
|
|
635
|
+
for (char c : string) {
|
|
636
|
+
node = &node->children[c];
|
|
637
|
+
}
|
|
638
|
+
node->is_end_of_string = true;
|
|
639
|
+
}
|
|
640
|
+
};
|
|
641
|
+
|
|
642
|
+
TrieNode trie;
|
|
643
|
+
for (const auto & s : strings) {
|
|
644
|
+
trie.insert(s);
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char"));
|
|
648
|
+
std::ostringstream out;
|
|
649
|
+
out << "[\"] ( ";
|
|
650
|
+
std::function<void(const TrieNode &)> visit = [&](const TrieNode & node) {
|
|
651
|
+
std::ostringstream rejects;
|
|
652
|
+
auto first = true;
|
|
653
|
+
for (const auto & kv : node.children) {
|
|
654
|
+
rejects << kv.first;
|
|
655
|
+
if (first) {
|
|
656
|
+
first = false;
|
|
657
|
+
} else {
|
|
658
|
+
out << " | ";
|
|
659
|
+
}
|
|
660
|
+
out << "[" << kv.first << "]";
|
|
661
|
+
if (!kv.second.children.empty()) {
|
|
662
|
+
out << " (";
|
|
663
|
+
visit(kv.second);
|
|
664
|
+
out << ")";
|
|
665
|
+
} else if (kv.second.is_end_of_string) {
|
|
666
|
+
out << " " << char_rule << "+";
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
if (!node.children.empty()) {
|
|
670
|
+
if (!first) {
|
|
671
|
+
out << " | ";
|
|
672
|
+
}
|
|
673
|
+
out << "[^\"" << rejects.str() << "] " << char_rule << "*";
|
|
674
|
+
}
|
|
675
|
+
};
|
|
676
|
+
visit(trie);
|
|
677
|
+
|
|
678
|
+
out << " )";
|
|
679
|
+
if (!trie.is_end_of_string) {
|
|
680
|
+
out << "?";
|
|
681
|
+
}
|
|
682
|
+
out << " [\"] space";
|
|
683
|
+
return out.str();
|
|
684
|
+
}
|
|
685
|
+
|
|
429
686
|
std::string _resolve_ref(const std::string & ref) {
|
|
430
687
|
std::string ref_name = ref.substr(ref.find_last_of('/') + 1);
|
|
431
688
|
if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) {
|
|
@@ -446,6 +703,7 @@ private:
|
|
|
446
703
|
std::vector<std::string> required_props;
|
|
447
704
|
std::vector<std::string> optional_props;
|
|
448
705
|
std::unordered_map<std::string, std::string> prop_kv_rule_names;
|
|
706
|
+
std::vector<std::string> prop_names;
|
|
449
707
|
for (const auto & kv : properties) {
|
|
450
708
|
const auto &prop_name = kv.first;
|
|
451
709
|
const auto &prop_schema = kv.second;
|
|
@@ -460,11 +718,18 @@ private:
|
|
|
460
718
|
} else {
|
|
461
719
|
optional_props.push_back(prop_name);
|
|
462
720
|
}
|
|
721
|
+
prop_names.push_back(prop_name);
|
|
463
722
|
}
|
|
464
|
-
if (
|
|
723
|
+
if ((additional_properties.is_boolean() && additional_properties.get<bool>()) || additional_properties.is_object()) {
|
|
465
724
|
std::string sub_name = name + (name.empty() ? "" : "-") + "additional";
|
|
466
|
-
std::string value_rule =
|
|
467
|
-
|
|
725
|
+
std::string value_rule =
|
|
726
|
+
additional_properties.is_object() ? visit(additional_properties, sub_name + "-value")
|
|
727
|
+
: _add_primitive("value", PRIMITIVE_RULES.at("value"));
|
|
728
|
+
|
|
729
|
+
auto key_rule =
|
|
730
|
+
prop_names.empty() ? _add_primitive("string", PRIMITIVE_RULES.at("string"))
|
|
731
|
+
: _add_rule(sub_name + "-k", _not_strings(prop_names));
|
|
732
|
+
std::string kv_rule = _add_rule(sub_name + "-kv", key_rule + " \":\" space " + value_rule);
|
|
468
733
|
prop_kv_rule_names["*"] = kv_rule;
|
|
469
734
|
optional_props.push_back("*");
|
|
470
735
|
}
|
|
@@ -490,15 +755,11 @@ private:
|
|
|
490
755
|
}
|
|
491
756
|
std::string k = ks[0];
|
|
492
757
|
std::string kv_rule_name = prop_kv_rule_names[k];
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
kv_rule_name + " ( \",\" space " + kv_rule_name + " )*"
|
|
497
|
-
);
|
|
498
|
-
} else if (first_is_optional) {
|
|
499
|
-
res = "( \",\" space " + kv_rule_name + " )?";
|
|
758
|
+
std::string comma_ref = "( \",\" space " + kv_rule_name + " )";
|
|
759
|
+
if (first_is_optional) {
|
|
760
|
+
res = comma_ref + (k == "*" ? "*" : "?");
|
|
500
761
|
} else {
|
|
501
|
-
res = kv_rule_name;
|
|
762
|
+
res = kv_rule_name + (k == "*" ? " " + comma_ref + "*" : "");
|
|
502
763
|
}
|
|
503
764
|
if (ks.size() > 1) {
|
|
504
765
|
res += " " + _add_rule(
|
|
@@ -632,17 +893,19 @@ public:
|
|
|
632
893
|
} else if (schema_type.is_array()) {
|
|
633
894
|
std::vector<json> schema_types;
|
|
634
895
|
for (const auto & t : schema_type) {
|
|
635
|
-
|
|
896
|
+
json schema_copy(schema);
|
|
897
|
+
schema_copy["type"] = t;
|
|
898
|
+
schema_types.push_back(schema_copy);
|
|
636
899
|
}
|
|
637
900
|
return _add_rule(rule_name, _generate_union_rule(name, schema_types));
|
|
638
901
|
} else if (schema.contains("const")) {
|
|
639
|
-
return _add_rule(rule_name, _generate_constant_rule(schema["const"]));
|
|
902
|
+
return _add_rule(rule_name, _generate_constant_rule(schema["const"]) + " space");
|
|
640
903
|
} else if (schema.contains("enum")) {
|
|
641
904
|
std::vector<std::string> enum_values;
|
|
642
905
|
for (const auto & v : schema["enum"]) {
|
|
643
906
|
enum_values.push_back(_generate_constant_rule(v));
|
|
644
907
|
}
|
|
645
|
-
return _add_rule(rule_name, join(enum_values.begin(), enum_values.end(), " | "));
|
|
908
|
+
return _add_rule(rule_name, "(" + join(enum_values.begin(), enum_values.end(), " | ") + ") space");
|
|
646
909
|
} else if ((schema_type.is_null() || schema_type == "object")
|
|
647
910
|
&& (schema.contains("properties") ||
|
|
648
911
|
(schema.contains("additionalProperties") && schema["additionalProperties"] != true))) {
|
|
@@ -724,6 +987,24 @@ public:
|
|
|
724
987
|
int min_len = schema.contains("minLength") ? schema["minLength"].get<int>() : 0;
|
|
725
988
|
int max_len = schema.contains("maxLength") ? schema["maxLength"].get<int>() : std::numeric_limits<int>::max();
|
|
726
989
|
return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space");
|
|
990
|
+
} else if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) {
|
|
991
|
+
int min_value = std::numeric_limits<int>::min();
|
|
992
|
+
int max_value = std::numeric_limits<int>::max();
|
|
993
|
+
if (schema.contains("minimum")) {
|
|
994
|
+
min_value = schema["minimum"].get<int>();
|
|
995
|
+
} else if (schema.contains("exclusiveMinimum")) {
|
|
996
|
+
min_value = schema["exclusiveMinimum"].get<int>() + 1;
|
|
997
|
+
}
|
|
998
|
+
if (schema.contains("maximum")) {
|
|
999
|
+
max_value = schema["maximum"].get<int>();
|
|
1000
|
+
} else if (schema.contains("exclusiveMaximum")) {
|
|
1001
|
+
max_value = schema["exclusiveMaximum"].get<int>() - 1;
|
|
1002
|
+
}
|
|
1003
|
+
std::stringstream out;
|
|
1004
|
+
out << "(";
|
|
1005
|
+
_build_min_max_int(min_value, max_value, out);
|
|
1006
|
+
out << ") space";
|
|
1007
|
+
return _add_rule(rule_name, out.str());
|
|
727
1008
|
} else if (schema.empty() || schema_type == "object") {
|
|
728
1009
|
return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object")));
|
|
729
1010
|
} else {
|
|
@@ -37,11 +37,18 @@ struct llama_ngram {
|
|
|
37
37
|
}
|
|
38
38
|
};
|
|
39
39
|
|
|
40
|
+
struct llama_token_hash_function {
|
|
41
|
+
size_t operator()(const llama_token token) const {
|
|
42
|
+
// see https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
|
|
43
|
+
return token * 11400714819323198485llu;
|
|
44
|
+
}
|
|
45
|
+
};
|
|
46
|
+
|
|
40
47
|
struct llama_ngram_hash_function {
|
|
41
48
|
size_t operator()(const llama_ngram & ngram) const {
|
|
42
|
-
size_t hash = 0;
|
|
43
|
-
for (int i =
|
|
44
|
-
hash ^=
|
|
49
|
+
size_t hash = llama_token_hash_function{}(ngram.tokens[0]);
|
|
50
|
+
for (int i = 1; i < LLAMA_NGRAM_MAX; ++i) {
|
|
51
|
+
hash ^= llama_token_hash_function{}(ngram.tokens[i]);
|
|
45
52
|
}
|
|
46
53
|
return hash;
|
|
47
54
|
}
|
|
@@ -28,9 +28,13 @@ struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_
|
|
|
28
28
|
|
|
29
29
|
std::vector<const llama_grammar_element *> grammar_rules(result->parsed_grammar.c_rules());
|
|
30
30
|
|
|
31
|
-
|
|
31
|
+
struct llama_grammar * grammar = llama_grammar_init(
|
|
32
32
|
grammar_rules.data(),
|
|
33
33
|
grammar_rules.size(), result->parsed_grammar.symbol_ids.at("root"));
|
|
34
|
+
if (grammar == nullptr) {
|
|
35
|
+
throw std::runtime_error("Failed to initialize llama_grammar");
|
|
36
|
+
}
|
|
37
|
+
result->grammar = grammar;
|
|
34
38
|
}
|
|
35
39
|
|
|
36
40
|
result->prev.resize(params.n_prev);
|
|
@@ -59,9 +63,13 @@ void llama_sampling_reset(llama_sampling_context * ctx) {
|
|
|
59
63
|
if (!ctx->parsed_grammar.rules.empty()) {
|
|
60
64
|
std::vector<const llama_grammar_element *> grammar_rules(ctx->parsed_grammar.c_rules());
|
|
61
65
|
|
|
62
|
-
|
|
66
|
+
struct llama_grammar * grammar = llama_grammar_init(
|
|
63
67
|
grammar_rules.data(),
|
|
64
68
|
grammar_rules.size(), ctx->parsed_grammar.symbol_ids.at("root"));
|
|
69
|
+
if (grammar == nullptr) {
|
|
70
|
+
throw std::runtime_error("Failed to initialize llama_grammar");
|
|
71
|
+
}
|
|
72
|
+
ctx->grammar = grammar;
|
|
65
73
|
}
|
|
66
74
|
|
|
67
75
|
std::fill(ctx->prev.begin(), ctx->prev.end(), 0);
|
|
@@ -274,8 +282,6 @@ static llama_token llama_sampling_sample_impl(
|
|
|
274
282
|
GGML_ASSERT(!original_logits.empty());
|
|
275
283
|
}
|
|
276
284
|
llama_token id = 0;
|
|
277
|
-
// Get a pointer to the logits
|
|
278
|
-
float * logits = llama_get_logits_ith(ctx_main, idx);
|
|
279
285
|
|
|
280
286
|
if (temp < 0.0) {
|
|
281
287
|
// greedy sampling, with probs
|
|
@@ -316,12 +322,15 @@ static llama_token llama_sampling_sample_impl(
|
|
|
316
322
|
}
|
|
317
323
|
|
|
318
324
|
if (ctx_sampling->grammar != NULL && !is_resampling) {
|
|
325
|
+
// Get a pointer to the logits
|
|
326
|
+
float * logits = llama_get_logits_ith(ctx_main, idx);
|
|
327
|
+
|
|
319
328
|
// Create an array with a single token data element for the sampled id
|
|
320
329
|
llama_token_data single_token_data = {id, logits[id], 0.0f};
|
|
321
330
|
llama_token_data_array single_token_data_array = { &single_token_data, 1, false };
|
|
322
331
|
|
|
323
332
|
// Apply grammar constraints to the single token
|
|
324
|
-
|
|
333
|
+
llama_grammar_sample(ctx_sampling->grammar, ctx_main, &single_token_data_array);
|
|
325
334
|
|
|
326
335
|
// Check if the token is valid according to the grammar by seeing if its logit has been set to -INFINITY
|
|
327
336
|
bool is_valid = single_token_data_array.data[0].logit != -INFINITY;
|
|
@@ -369,7 +378,7 @@ static llama_token_data_array llama_sampling_prepare_impl(
|
|
|
369
378
|
if (ctx_sampling->grammar != NULL && !apply_grammar) {
|
|
370
379
|
GGML_ASSERT(original_logits != NULL);
|
|
371
380
|
// Only make a copy of the original logits if we are not applying grammar checks, not sure if I actually have to do this.
|
|
372
|
-
*original_logits = {logits, logits +
|
|
381
|
+
*original_logits = {logits, logits + n_vocab};
|
|
373
382
|
}
|
|
374
383
|
|
|
375
384
|
// apply params.logit_bias map
|
|
@@ -382,10 +391,10 @@ static llama_token_data_array llama_sampling_prepare_impl(
|
|
|
382
391
|
llama_sample_apply_guidance(ctx_main, logits, logits_guidance, params.cfg_scale);
|
|
383
392
|
}
|
|
384
393
|
|
|
385
|
-
cur.
|
|
394
|
+
cur.resize(n_vocab);
|
|
386
395
|
|
|
387
396
|
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
|
388
|
-
cur
|
|
397
|
+
cur[token_id] = llama_token_data{token_id, logits[token_id], 0.0f};
|
|
389
398
|
}
|
|
390
399
|
|
|
391
400
|
llama_token_data_array cur_p = { cur.data(), cur.size(), false };
|
|
@@ -412,7 +421,7 @@ static llama_token_data_array llama_sampling_prepare_impl(
|
|
|
412
421
|
|
|
413
422
|
// apply grammar checks before sampling logic
|
|
414
423
|
if (apply_grammar && ctx_sampling->grammar != NULL) {
|
|
415
|
-
|
|
424
|
+
llama_grammar_sample(ctx_sampling->grammar, ctx_main, &cur_p);
|
|
416
425
|
}
|
|
417
426
|
|
|
418
427
|
return cur_p;
|
|
@@ -446,6 +455,6 @@ void llama_sampling_accept(
|
|
|
446
455
|
ctx_sampling->prev.push_back(id);
|
|
447
456
|
|
|
448
457
|
if (ctx_sampling->grammar != NULL && apply_grammar) {
|
|
449
|
-
llama_grammar_accept_token(
|
|
458
|
+
llama_grammar_accept_token(ctx_sampling->grammar, ctx_main, id);
|
|
450
459
|
}
|
|
451
460
|
}
|