llama-cpp-python 0.2.37__tar.gz → 0.2.39__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_cpp_python-0.2.39/.git/FETCH_HEAD +1 -0
- llama_cpp_python-0.2.39/.git/HEAD +1 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/config +1 -1
- llama_cpp_python-0.2.39/.git/index +0 -0
- llama_cpp_python-0.2.39/.git/logs/HEAD +1 -0
- llama_cpp_python-0.2.39/.git/modules/vendor/llama.cpp/FETCH_HEAD +1 -0
- llama_cpp_python-0.2.39/.git/modules/vendor/llama.cpp/HEAD +1 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/modules/vendor/llama.cpp/config +1 -1
- llama_cpp_python-0.2.39/.git/modules/vendor/llama.cpp/index +0 -0
- llama_cpp_python-0.2.39/.git/modules/vendor/llama.cpp/logs/HEAD +2 -0
- llama_cpp_python-0.2.39/.git/modules/vendor/llama.cpp/logs/refs/heads/master +1 -0
- llama_cpp_python-0.2.39/.git/modules/vendor/llama.cpp/logs/refs/remotes/origin/HEAD +1 -0
- llama_cpp_python-0.2.39/.git/modules/vendor/llama.cpp/objects/95/161b3f4b3d6c1c35e9f423a1a933b0735bec4f +0 -0
- llama_cpp_python-0.2.39/.git/modules/vendor/llama.cpp/objects/b0/8f22c882a1443e6b97081f3ce718a4d1a741f8 +0 -0
- llama_cpp_python-0.2.39/.git/modules/vendor/llama.cpp/objects/dd/fe289fed86e1d59a21ea2d6f625ff44620eec5 +0 -0
- llama_cpp_python-0.2.39/.git/modules/vendor/llama.cpp/objects/pack/pack-d5af55203ef291c34ac81ee488254e42e1f134d3.idx +0 -0
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/objects/pack/pack-2dab3d9b62e8dec5ea0d7cf3608572059e30b9db.pack → llama_cpp_python-0.2.39/.git/modules/vendor/llama.cpp/objects/pack/pack-d5af55203ef291c34ac81ee488254e42e1f134d3.pack +0 -0
- llama_cpp_python-0.2.39/.git/modules/vendor/llama.cpp/objects/pack/pack-d5af55203ef291c34ac81ee488254e42e1f134d3.rev +0 -0
- llama_cpp_python-0.2.39/.git/modules/vendor/llama.cpp/packed-refs +2 -0
- llama_cpp_python-0.2.39/.git/modules/vendor/llama.cpp/refs/heads/master +1 -0
- llama_cpp_python-0.2.39/.git/modules/vendor/llama.cpp/shallow +2 -0
- llama_cpp_python-0.2.39/.git/objects/pack/pack-b1310bf828284f1e9291e80af9059046b7d97b6b.idx +0 -0
- llama_cpp_python-0.2.39/.git/objects/pack/pack-b1310bf828284f1e9291e80af9059046b7d97b6b.pack +0 -0
- llama_cpp_python-0.2.39/.git/objects/pack/pack-b1310bf828284f1e9291e80af9059046b7d97b6b.rev +0 -0
- llama_cpp_python-0.2.39/.git/refs/tags/v0.2.39 +1 -0
- llama_cpp_python-0.2.39/.git/shallow +1 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/CHANGELOG.md +11 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/PKG-INFO +19 -1
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/README.md +18 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/examples/high_level_api/fastapi_server.py +1 -1
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/llama_cpp/__init__.py +1 -1
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/llama_cpp/_internals.py +20 -31
- llama_cpp_python-0.2.39/llama_cpp/_logger.py +37 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/llama_cpp/llama.py +95 -93
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/llama_cpp/llama_chat_format.py +9 -8
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/llama_cpp/llama_cpp.py +36 -9
- llama_cpp_python-0.2.39/llama_cpp/llama_speculative.py +64 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/llama_cpp/server/model.py +9 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/llama_cpp/server/settings.py +9 -0
- llama_cpp_python-0.2.39/tests/test_llama_speculative.py +16 -0
- llama_cpp_python-0.2.39/vendor/llama.cpp/.devops/main-intel.Dockerfile +28 -0
- llama_cpp_python-0.2.39/vendor/llama.cpp/.devops/main-vulkan.Dockerfile +29 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.devops/nix/package.nix +17 -4
- llama_cpp_python-0.2.39/vendor/llama.cpp/.devops/server-intel.Dockerfile +28 -0
- llama_cpp_python-0.2.39/vendor/llama.cpp/.devops/server-vulkan.Dockerfile +29 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.github/workflows/build.yml +29 -2
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.github/workflows/editorconfig.yml +6 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.gitignore +1 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/CMakeLists.txt +30 -10
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/Makefile +142 -54
- llama_cpp_python-0.2.39/vendor/llama.cpp/README-sycl.md +496 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/README.md +101 -35
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/common/build-info.cpp +1 -1
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/common/common.cpp +48 -32
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/common/common.h +33 -34
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/common/train.cpp +6 -6
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/convert-hf-to-gguf.py +178 -1
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/convert.py +7 -3
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/batched-bench/batched-bench.cpp +1 -1
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/imatrix/imatrix.cpp +112 -4
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama-bench/README.md +21 -13
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama-bench/llama-bench.cpp +85 -30
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llava/MobileVLM-README.md +56 -2
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/main/main.cpp +2 -2
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/perplexity/perplexity.cpp +6 -6
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/quantize/quantize.cpp +2 -2
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -2
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/server/README.md +19 -1
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/server/chat.sh +1 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/server/server.cpp +136 -97
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/sycl/ls-sycl-device.cpp +6 -4
- llama_cpp_python-0.2.39/vendor/llama.cpp/examples/sycl/win-build-sycl.bat +23 -0
- llama_cpp_python-0.2.39/vendor/llama.cpp/examples/sycl/win-run-llama2.bat +13 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/flake.lock +9 -9
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/flake.nix +1 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-cuda.cu +293 -162
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-impl.h +2 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-metal.m +10 -3
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-metal.metal +29 -4
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-quants.c +39 -94
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-quants.h +68 -59
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-sycl.cpp +215 -120
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-sycl.h +7 -5
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-vulkan-shaders.hpp +1433 -11139
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-vulkan.cpp +269 -182
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml.c +123 -21
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml.h +3 -1
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml_vk_generate_shaders.py +92 -128
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/gguf-py/gguf/constants.py +18 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/gguf-py/gguf/gguf_writer.py +3 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/gguf-py/gguf/tensor_mapping.py +12 -2
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/llama.cpp +384 -162
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/llama.h +11 -20
- llama_cpp_python-0.2.39/vendor/llama.cpp/scripts/install-oneapi.bat +19 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/server-llm.sh +75 -43
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/test-backend-ops.cpp +69 -5
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/test-llama-grammar.cpp +1 -1
- llama_cpp_python-0.2.37/.git/FETCH_HEAD +0 -1
- llama_cpp_python-0.2.37/.git/HEAD +0 -1
- llama_cpp_python-0.2.37/.git/index +0 -0
- llama_cpp_python-0.2.37/.git/logs/HEAD +0 -1
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/HEAD +0 -1
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/index +0 -0
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/logs/HEAD +0 -2
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/logs/refs/heads/master +0 -1
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/logs/refs/remotes/origin/HEAD +0 -1
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/objects/pack/pack-2dab3d9b62e8dec5ea0d7cf3608572059e30b9db.idx +0 -0
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/objects/pack/pack-2dab3d9b62e8dec5ea0d7cf3608572059e30b9db.rev +0 -0
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/packed-refs +0 -2
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/refs/heads/master +0 -1
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/shallow +0 -1
- llama_cpp_python-0.2.37/.git/objects/02/c09afb0bf5559d3fe64ce67f4ff82af32ff50f +0 -0
- llama_cpp_python-0.2.37/.git/objects/03/667ba9b14656308c89e62f61377b295604a99d +0 -0
- llama_cpp_python-0.2.37/.git/objects/04/5856ea2ffe697ec33db1c1c989bd45cde5bb3d +0 -0
- llama_cpp_python-0.2.37/.git/objects/04/7bc14424303575f73af90611fec827334f54e8 +0 -0
- llama_cpp_python-0.2.37/.git/objects/0a/77bbdaa388e239d44a9f001f811b1e31d9ad7a +0 -0
- llama_cpp_python-0.2.37/.git/objects/0d/870969f4b23bd92a09ec29134d3fb454d38bec +0 -0
- llama_cpp_python-0.2.37/.git/objects/0e/2a907c0e40de1a432ee0cd2e3d01c634df1a37 +0 -0
- llama_cpp_python-0.2.37/.git/objects/11/2a0f7872fa9244bf38729a2722dc5c08dec20c +0 -0
- llama_cpp_python-0.2.37/.git/objects/11/b6d5c26e3f7157658952b8ec353e985d522fac +0 -1
- llama_cpp_python-0.2.37/.git/objects/13/c5b6b0df5f67e80cbe584909b83777901265a1 +0 -0
- llama_cpp_python-0.2.37/.git/objects/19/9bd4ffbf88c68c98b52c05f388dfa92716f6b7 +0 -0
- llama_cpp_python-0.2.37/.git/objects/1a/5152530cfbde487c928b60269a29fa5219f617 +0 -0
- llama_cpp_python-0.2.37/.git/objects/21/68579c83b3b92db0ba1839b37ee186323abbef +0 -0
- llama_cpp_python-0.2.37/.git/objects/23/e37d4d40e5ec0bfd85b5e928834d58e2cf0da6 +0 -0
- llama_cpp_python-0.2.37/.git/objects/24/04228325d88c59b819acaedb20b15635f75df9 +0 -0
- llama_cpp_python-0.2.37/.git/objects/27/2e4767b47397d1529e6a2d01298144d9d746ba +0 -0
- llama_cpp_python-0.2.37/.git/objects/27/a6b1e5042318f85dc99c9f24f4dd6fc472d242 +0 -0
- llama_cpp_python-0.2.37/.git/objects/29/03e0146d304bcacbfabfe71f171a2edc03043e +0 -0
- llama_cpp_python-0.2.37/.git/objects/2a/721872078dd3a83317627fa5a8e3ca6c6cb3f3 +0 -0
- llama_cpp_python-0.2.37/.git/objects/2c/c6fb02dcb3226c1ecf0fbed6453eb3acadeaa2 +0 -0
- llama_cpp_python-0.2.37/.git/objects/36/8022c457f6dd5bb566f7e2a21d88850bd98768 +0 -0
- llama_cpp_python-0.2.37/.git/objects/39/697bfc2538bd4558018d0d721d6d6028c4bb56 +0 -2
- llama_cpp_python-0.2.37/.git/objects/3a/1d7180d508818fe957923e00dcd8950938632d +0 -5
- llama_cpp_python-0.2.37/.git/objects/3a/6457dcdfd47e764654bacae0ba8347976b645a +0 -0
- llama_cpp_python-0.2.37/.git/objects/3d/d00767671c5e9dac5a2ab8f4f1331531294b60 +0 -0
- llama_cpp_python-0.2.37/.git/objects/41/3097201ac0fc27ef4dcf518b699a6fb610a54c +0 -0
- llama_cpp_python-0.2.37/.git/objects/41/cc68ea2402cf682807649d841e7c0f4175db01 +0 -0
- llama_cpp_python-0.2.37/.git/objects/43/5af43a845bd86d9bc7ed55f735a7e100ad566b +0 -0
- llama_cpp_python-0.2.37/.git/objects/47/4503fdfc554d8caabee4f321a80427f8c7d696 +0 -0
- llama_cpp_python-0.2.37/.git/objects/4a/106470b0b650cd76f2f5d00d744b615c72bed3 +0 -0
- llama_cpp_python-0.2.37/.git/objects/4b/3189dd1a54be3bc416786ddf184dd047dc4b20 +0 -0
- llama_cpp_python-0.2.37/.git/objects/4b/c4a6c9741137cb9351feebdd77ae023beae9ec +0 -0
- llama_cpp_python-0.2.37/.git/objects/4c/e899ccd257ed95da4f07179bfff30024aeefb8 +0 -0
- llama_cpp_python-0.2.37/.git/objects/51/f357200f8b998031f4be924e11ed2ae4bf3fea +0 -0
- llama_cpp_python-0.2.37/.git/objects/56/2410fe1a4c4093d62e1705933eaf66c6d99a2c +0 -2
- llama_cpp_python-0.2.37/.git/objects/5f/350ffe99abe7297b2839f36fd0eaeb98887805 +0 -0
- llama_cpp_python-0.2.37/.git/objects/60/bc7aef42aac0409cfdca666ad2ff6f516d7b5b +0 -0
- llama_cpp_python-0.2.37/.git/objects/61/027ef99725c50b0891fdbf0bf263a33abe648f +0 -0
- llama_cpp_python-0.2.37/.git/objects/65/1cd4ccf800ecca4ed5d0dab493b41f97d0d117 +0 -0
- llama_cpp_python-0.2.37/.git/objects/68/7316b327ca038d26a338b3070a7d4698322515 +0 -0
- llama_cpp_python-0.2.37/.git/objects/74/7c6130e3cb1479d20e2013b1dd3db3379c2266 +0 -1
- llama_cpp_python-0.2.37/.git/objects/76/e26fbd106895fba52861f8ac1e11cc6ee2a307 +0 -0
- llama_cpp_python-0.2.37/.git/objects/79/5dad726ae91f330c56e49bc188080b5b3b5ba9 +0 -0
- llama_cpp_python-0.2.37/.git/objects/79/a9e67a1aee09c6d182f240ba5eef32feabcbce +0 -3
- llama_cpp_python-0.2.37/.git/objects/7a/7b899ec7dd48d192cba14ac9c8e9df4233f7fb +0 -0
- llama_cpp_python-0.2.37/.git/objects/7d/5498f9d2c49c1f731b47845d845f6c5e16a3d9 +0 -0
- llama_cpp_python-0.2.37/.git/objects/7d/6c970483161eaf43cfa9d50010c071d4953053 +0 -0
- llama_cpp_python-0.2.37/.git/objects/7e/df0975dc12ccc95ad14de085f07efe6d65c620 +0 -3
- llama_cpp_python-0.2.37/.git/objects/7e/e8f748eab47180cea09c0ad8e75c3b991b4af4 +0 -0
- llama_cpp_python-0.2.37/.git/objects/80/7b0f57a8a873e58ade0ff0f5b0bcf0ff66b7f9 +0 -0
- llama_cpp_python-0.2.37/.git/objects/82/d901c150002ef7bc17c9d77f2c5e3a2fed1240 +0 -0
- llama_cpp_python-0.2.37/.git/objects/82/e5c4487e57d6d59c901a73bdd2a9bc172fee7c +0 -3
- llama_cpp_python-0.2.37/.git/objects/8b/d03f88a1895cbf3ef249e13df79ee0efda779d +0 -2
- llama_cpp_python-0.2.37/.git/objects/91/abb11fdf507883caeeb2d2958e1c65fb6cbdc1 +0 -0
- llama_cpp_python-0.2.37/.git/objects/99/32d6130f5552a9b85c8b15b4ac6bc26b1068ce +0 -0
- llama_cpp_python-0.2.37/.git/objects/9a/fff3d0aa6ccce179543174cc7025807b87de08 +0 -0
- llama_cpp_python-0.2.37/.git/objects/9d/0ec2f705618e591cfa8d6512cb9a96b3da75f1 +0 -0
- llama_cpp_python-0.2.37/.git/objects/9d/3d3559849603efda6f3c8181684e4d19e0ec79 +0 -0
- llama_cpp_python-0.2.37/.git/objects/9e/9870a52245d4f245df5a4e1b89fda121d78214 +0 -0
- llama_cpp_python-0.2.37/.git/objects/9f/e1a7bfd96185d4dee4384ff2508682cd0d9ad6 +0 -0
- llama_cpp_python-0.2.37/.git/objects/a4/5e5d77363eb85144d4aff1a3cbe86ce94d5c92 +0 -0
- llama_cpp_python-0.2.37/.git/objects/a9/e51cdc1672134ec9af66c9eccf09f6da4ceccd +0 -0
- llama_cpp_python-0.2.37/.git/objects/ac/82b8fbb81087ec9b3a72d9e377102a31b28d16 +0 -0
- llama_cpp_python-0.2.37/.git/objects/b1/f90b91594f496ad9f27b1a68584984f4b523d2 +0 -0
- llama_cpp_python-0.2.37/.git/objects/b5/618c10dccf35deb0e69b4df9bc582710f84d07 +0 -0
- llama_cpp_python-0.2.37/.git/objects/b9/1632f5bc787c1404600c894a6a4126359747d8 +0 -0
- llama_cpp_python-0.2.37/.git/objects/bb/b68069d5f8125d64af0baffbd4695f1a0f729c +0 -0
- llama_cpp_python-0.2.37/.git/objects/bb/cbbe7d61558adde3cbfd0c7a63a67c27ed6d30 +0 -0
- llama_cpp_python-0.2.37/.git/objects/bf/9e824922a3fa95b336ad441eca7e42f9b33358 +0 -0
- llama_cpp_python-0.2.37/.git/objects/c1/0aee42e0da547428df7cac9845e246badf1803 +0 -0
- llama_cpp_python-0.2.37/.git/objects/c3/deba87b41117da6624f6d92a016ce50239dfc3 +0 -0
- llama_cpp_python-0.2.37/.git/objects/c8/5e73d2b657bb05ed99309615d67bac93d9f86e +0 -1
- llama_cpp_python-0.2.37/.git/objects/ca/ebbb67fdb02a0a8897d4e4826ea046a9931f6f +0 -0
- llama_cpp_python-0.2.37/.git/objects/cb/221880a66e3c1f2ca15a9df52ac4bcb765e7d4 +0 -0
- llama_cpp_python-0.2.37/.git/objects/cc/6a3a7252ea6e698614f0629d4bc040ab6ca717 +0 -0
- llama_cpp_python-0.2.37/.git/objects/cd/351ba33849dcf6af35b493f7405962fa1625d4 +0 -0
- llama_cpp_python-0.2.37/.git/objects/d1/ae9b564d3ab02c6b91162e52d822d36524edb3 +0 -0
- llama_cpp_python-0.2.37/.git/objects/d3/329eec3bac6ce7e54c76b77ac9bf99fab0fe3f +0 -0
- llama_cpp_python-0.2.37/.git/objects/d3/efb3a6fe1e001db62ec08e5d31ce1d08567045 +0 -0
- llama_cpp_python-0.2.37/.git/objects/d8/ef563c2dc69fe1ea223be7bc5b0efc27ad1f9c +0 -0
- llama_cpp_python-0.2.37/.git/objects/da/c33b74dddf06fcfc01244044eebb102cfcea37 +0 -0
- llama_cpp_python-0.2.37/.git/objects/e1/92a677cd79beca2909e667f7e22044ff1f700b +0 -0
- llama_cpp_python-0.2.37/.git/objects/e2/13518b95011cb6ee783986624c3b6de8659f81 +0 -0
- llama_cpp_python-0.2.37/.git/objects/e4/1f375774e6945e445bfb179502b128fe22dda7 +0 -0
- llama_cpp_python-0.2.37/.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 +0 -0
- llama_cpp_python-0.2.37/.git/objects/eb/0fb9662e690d0f9de4632cddd321b3f872a725 +0 -0
- llama_cpp_python-0.2.37/.git/objects/eb/9a2cfa9167df02f136502af79738c71363abfd +0 -0
- llama_cpp_python-0.2.37/.git/objects/ef/1b2c0162e8edd321e2b9c1ce375d96f1f1d048 +0 -0
- llama_cpp_python-0.2.37/.git/objects/f0/827d762e852a21f6406c469300899d5f509b8f +0 -0
- llama_cpp_python-0.2.37/.git/objects/f1/b8e9d154231932c4b7b9b59611626764e68632 +0 -0
- llama_cpp_python-0.2.37/.git/objects/f2/0813765a70679e8a063871c9ef75d75c65ccb6 +0 -0
- llama_cpp_python-0.2.37/.git/objects/fa/dfc5fb4fe6f5eb6d5d98b62519e374a5202b00 +0 -0
- llama_cpp_python-0.2.37/.git/objects/fc/25ff5160028dee3570249abc40cd57780bcca9 +0 -0
- llama_cpp_python-0.2.37/.git/objects/fc/ef8cd800ee8a265b146748d178cb56b5632bf3 +0 -0
- llama_cpp_python-0.2.37/.git/objects/fd/64c09b37947c97e58903ce570785c657d56722 +0 -0
- llama_cpp_python-0.2.37/.git/objects/fe/b0ed68d94eac48b844fd587ddfb808649716a1 +0 -1
- llama_cpp_python-0.2.37/.git/objects/ff/1484c2b966abc1ca2625fdae6a4fb52b8226f0 +0 -0
- llama_cpp_python-0.2.37/.git/objects/ff/3e950cd1110fe552912cea4c268c4023d2b737 +0 -3
- llama_cpp_python-0.2.37/.git/refs/tags/v0.2.37 +0 -1
- llama_cpp_python-0.2.37/.git/shallow +0 -1
- llama_cpp_python-0.2.37/vendor/llama.cpp/.devops/main-intel.Dockerfile +0 -26
- llama_cpp_python-0.2.37/vendor/llama.cpp/.devops/server-intel.Dockerfile +0 -25
- llama_cpp_python-0.2.37/vendor/llama.cpp/README_sycl.md +0 -252
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.dockerignore +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/description +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/hooks/applypatch-msg.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/hooks/commit-msg.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/hooks/fsmonitor-watchman.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/hooks/post-update.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/hooks/pre-applypatch.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/hooks/pre-commit.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/hooks/pre-merge-commit.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/hooks/pre-push.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/hooks/pre-rebase.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/hooks/pre-receive.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/hooks/prepare-commit-msg.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/hooks/push-to-checkout.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/hooks/sendemail-validate.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/hooks/update.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/info/exclude +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/modules/vendor/llama.cpp/description +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/modules/vendor/llama.cpp/hooks/applypatch-msg.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/modules/vendor/llama.cpp/hooks/commit-msg.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/modules/vendor/llama.cpp/hooks/fsmonitor-watchman.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/modules/vendor/llama.cpp/hooks/post-update.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/modules/vendor/llama.cpp/hooks/pre-applypatch.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/modules/vendor/llama.cpp/hooks/pre-commit.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/modules/vendor/llama.cpp/hooks/pre-merge-commit.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/modules/vendor/llama.cpp/hooks/pre-push.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/modules/vendor/llama.cpp/hooks/pre-rebase.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/modules/vendor/llama.cpp/hooks/pre-receive.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/modules/vendor/llama.cpp/hooks/prepare-commit-msg.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/modules/vendor/llama.cpp/hooks/push-to-checkout.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/modules/vendor/llama.cpp/hooks/sendemail-validate.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/modules/vendor/llama.cpp/hooks/update.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/modules/vendor/llama.cpp/info/exclude +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.git/modules/vendor/llama.cpp/refs/remotes/origin/HEAD +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.github/dependabot.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.github/workflows/build-and-release.yaml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.github/workflows/build-docker.yaml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.github/workflows/publish-to-test.yaml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.github/workflows/publish.yaml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.github/workflows/test-pypi.yaml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.github/workflows/test.yaml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.gitignore +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.gitmodules +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/.readthedocs.yaml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/LICENSE.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/Makefile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/docker/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/docker/cuda_simple/Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/docker/open_llama/Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/docker/open_llama/build.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/docker/open_llama/hug_model.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/docker/open_llama/start.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/docker/open_llama/start_server.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/docker/openblas_simple/Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/docker/simple/Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/docker/simple/run.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/docs/api-reference.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/docs/changelog.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/docs/index.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/docs/install/macos.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/docs/requirements.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/docs/server.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/examples/high_level_api/high_level_api_embedding.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/examples/high_level_api/high_level_api_inference.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/examples/high_level_api/high_level_api_streaming.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/examples/high_level_api/langchain_custom_llm.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/examples/low_level_api/Chat.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/examples/low_level_api/Miku.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/examples/low_level_api/ReasonAct.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/examples/low_level_api/common.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/examples/low_level_api/low_level_api_chat_cpp.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/examples/low_level_api/low_level_api_llama_cpp.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/examples/low_level_api/quantize.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/examples/low_level_api/readme/low_level_api_llama_cpp.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/examples/low_level_api/util.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/examples/notebooks/Batching.ipynb +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/examples/notebooks/Clients.ipynb +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/examples/notebooks/Functions.ipynb +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/examples/notebooks/Guidance.ipynb +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/examples/notebooks/Multimodal.ipynb +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/examples/notebooks/PerformanceTuning.ipynb +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/llama_cpp/_utils.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/llama_cpp/llama_cache.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/llama_cpp/llama_grammar.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/llama_cpp/llama_types.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/llama_cpp/llava_cpp.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/llama_cpp/py.typed +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/llama_cpp/server/__init__.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/llama_cpp/server/__main__.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/llama_cpp/server/app.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/llama_cpp/server/cli.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/llama_cpp/server/errors.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/llama_cpp/server/types.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/mkdocs.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/pyproject.toml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/tests/test_grammar.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/tests/test_llama.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/tests/test_llama_chat_format.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.clang-tidy +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.devops/cloud-v-pipeline +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.devops/full-cuda.Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.devops/full-rocm.Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.devops/full.Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.devops/main-cuda.Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.devops/main-rocm.Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.devops/main.Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.devops/nix/apps.nix +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.devops/nix/devshells.nix +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.devops/nix/jetson-support.nix +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.devops/nix/nixpkgs-instances.nix +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.devops/nix/scope.nix +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.devops/server-cuda.Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.devops/server-rocm.Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.devops/server.Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.devops/tools.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.dockerignore +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.ecrc +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.editorconfig +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.flake8 +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.git +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.github/ISSUE_TEMPLATE/bug.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.github/ISSUE_TEMPLATE/enhancement.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.github/workflows/code-coverage.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.github/workflows/docker.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.github/workflows/gguf-publish.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.github/workflows/nix-ci.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.github/workflows/nix-flake-update.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.github/workflows/python-check-requirements.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.github/workflows/python-lint.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.github/workflows/tidy-post.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.github/workflows/tidy-review.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.github/workflows/zig-build.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.gitmodules +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/.pre-commit-config.yaml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/LICENSE +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/Package.swift +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/SHA256SUMS +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/awq-py/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/awq-py/awq/apply_awq.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/awq-py/requirements.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/build.zig +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ci/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ci/run.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/cmake/FindSIMD.cmake +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/codecov.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/common/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/common/base64.hpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/common/build-info.cpp.in +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/common/console.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/common/console.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/common/grammar-parser.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/common/grammar-parser.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/common/log.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/common/sampling.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/common/sampling.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/common/stb_image.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/common/train.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/convert-llama-ggml-to-gguf.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/convert-lora-to-ggml.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/convert-persimmon-to-gguf.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/docs/BLIS.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/docs/llama-star/idea-arch.key +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/docs/llama-star/idea-arch.pdf +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/docs/token_generation_performance_tips.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/Miku.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/alpaca.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/base-translate.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/batched/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/batched/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/batched/batched.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/batched-bench/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/batched-bench/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/batched.swift/.gitignore +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/batched.swift/Makefile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/batched.swift/Package.swift +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/batched.swift/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/batched.swift/Sources/main.swift +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/beam-search/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/beam-search/beam-search.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/benchmark/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/chat-13B.bat +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/chat-13B.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/chat-persistent.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/chat-vicuna.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/chat.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/convert-llama2c-to-ggml/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/embedding/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/embedding/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/embedding/embedding.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/export-lora/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/export-lora/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/export-lora/export-lora.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/finetune/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/finetune/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/finetune/convert-finetune-checkpoint-to-gguf.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/finetune/finetune.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/finetune/finetune.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/gguf/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/gguf/gguf.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/gpt4all.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/imatrix/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/imatrix/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/infill/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/infill/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/infill/infill.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/jeopardy/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/jeopardy/graph.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/jeopardy/jeopardy.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/jeopardy/qasheet.csv +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/jeopardy/questions.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/json-schema-to-grammar.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama-bench/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/.gitignore +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/.gitignore +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/proguard-rules.pro +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/AndroidManifest.xml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/cpp/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/cpp/llama-android.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/Downloadable.kt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/Llm.kt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/MainActivity.kt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/MainViewModel.kt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Color.kt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Theme.kt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Type.kt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/res/drawable/ic_launcher_background.xml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/res/drawable/ic_launcher_foreground.xml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-anydpi/ic_launcher.xml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-anydpi/ic_launcher_round.xml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-hdpi/ic_launcher.webp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-mdpi/ic_launcher.webp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xhdpi/ic_launcher.webp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/res/values/colors.xml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/res/values/strings.xml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/res/values/themes.xml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/res/xml/backup_rules.xml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/app/src/main/res/xml/data_extraction_rules.xml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/build.gradle.kts +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/gradle/wrapper/gradle-wrapper.jar +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/gradle/wrapper/gradle-wrapper.properties +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/gradle.properties +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/gradlew +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.android/settings.gradle.kts +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.swiftui/.gitignore +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.swiftui/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/AppIcon.appiconset/Contents.json +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/Contents.json +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Models/LlamaState.swift +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/DownloadButton.swift +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/InputButton.swift +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/LoadCustomButton.swift +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/llama_swiftuiApp.swift +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/contents.xcworkspacedata +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama.vim +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama2-13b.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llama2.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llava/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llava/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llava/android/adb_run.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llava/android/build_64.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llava/clip.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llava/clip.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llava/convert-image-encoder-to-gguf.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llava/llava-cli.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llava/llava-surgery.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llava/llava.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llava/llava.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/llm.vim +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/lookahead/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/lookahead/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/lookahead/lookahead.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/lookup/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/lookup/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/lookup/lookup.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/main/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/main/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/main-cmake-pkg/.gitignore +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/main-cmake-pkg/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/make-ggml.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/parallel/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/parallel/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/parallel/parallel.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/passkey/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/passkey/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/passkey/passkey.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/perplexity/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/perplexity/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/pydantic-models-to-grammar-examples.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/pydantic_models_to_grammar.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/quantize/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/quantize/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/reason-act.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/server/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/server/api_like_OAI.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/server/chat-llama2.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/server/chat.mjs +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/server/completion.js.hpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/server/deps.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/server/httplib.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/server/index.html.hpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/server/index.js.hpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/server/json-schema-to-grammar.mjs.hpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/server/json.hpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/server/oai.hpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/server/public/completion.js +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/server/public/index.html +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/server/public/index.js +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/server/public/json-schema-to-grammar.mjs +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/server/utils.hpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/server-llama2-13B.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/simple/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/simple/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/simple/simple.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/speculative/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/speculative/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/speculative/speculative.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/sycl/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/sycl/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/sycl/build.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/sycl/run-llama2.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/tokenize/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/tokenize/tokenize.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/train-text-from-scratch/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-alloc.c +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-alloc.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-backend-impl.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-backend.c +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-backend.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-cuda.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-kompute.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-kompute.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-metal.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-mpi.c +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-mpi.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-opencl.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-opencl.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/ggml-vulkan.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/gguf-py/LICENSE +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/gguf-py/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/gguf-py/examples/writer.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/gguf-py/gguf/__init__.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/gguf-py/gguf/gguf.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/gguf-py/gguf/gguf_reader.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/gguf-py/gguf/py.typed +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/gguf-py/gguf/vocab.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/gguf-py/pyproject.toml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/gguf-py/scripts/__init__.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/gguf-py/scripts/gguf-convert-endian.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/gguf-py/scripts/gguf-dump.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/gguf-py/scripts/gguf-set-metadata.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/gguf-py/tests/test_gguf.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/grammars/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/grammars/arithmetic.gbnf +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/grammars/c.gbnf +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/grammars/chess.gbnf +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/grammars/japanese.gbnf +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/grammars/json.gbnf +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/grammars/json_arr.gbnf +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/grammars/list.gbnf +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/common.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_add.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_addrow.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_cpy_f16_f16.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_cpy_f16_f32.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_cpy_f32_f16.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_cpy_f32_f32.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_diagmask.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_gelu.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_getrows.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_getrows_f16.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_getrows_q4_0.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_getrows_q4_1.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_getrows_q6_k.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_mul.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_mul_mat_f16.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_mul_mat_mat_f32.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_mul_mat_q4_0.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_mul_mat_q4_1.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_mul_mat_q6_k.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_mul_mat_q8_0.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_mul_mv_q_n.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_norm.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_relu.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_rmsnorm.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_rope_f16.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_rope_f32.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_scale.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_scale_8.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_silu.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/op_softmax.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/kompute-shaders/rope_common.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/media/llama-leader.jpeg +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/media/llama0-banner.png +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/media/llama0-logo.png +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/media/llama1-banner.png +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/media/llama1-logo.png +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/mypy.ini +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/pocs/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/pocs/vdot/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/pocs/vdot/q8dot.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/pocs/vdot/vdot.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/prompts/LLM-questions.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/prompts/alpaca.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/prompts/assistant.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/prompts/chat-with-baichuan.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/prompts/chat-with-bob.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/prompts/chat-with-qwen.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/prompts/chat.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/prompts/dan-modified.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/prompts/dan.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/prompts/mnemonics.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/prompts/parallel-questions.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/prompts/reason-act.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/requirements/requirements-convert-lora-to-ggml.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/requirements/requirements-convert-persimmon-to-gguf.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/requirements/requirements-convert.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/requirements.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/LlamaConfig.cmake.in +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/build-info.cmake +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/build-info.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/check-requirements.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/ci-run.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/compare-llama-bench.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/convert-gg.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/gen-build-info-cpp.cmake +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/get-flags.mk +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/get-hellaswag.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/get-pg.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/get-wikitext-2.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/get-winogrande.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/qnt-all.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/run-all-perf.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/run-all-ppl.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/run-with-preset.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/sync-ggml-am.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/sync-ggml.last +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/sync-ggml.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/scripts/verify-checksum-models.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/spm-headers/llama.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/.gitignore +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/get-model.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/get-model.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/test-autorelease.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/test-c.c +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/test-double-float.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/test-grad0.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/test-grammar-parser.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/test-model-load-cancel.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/test-opt.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/test-quantize-fns.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/test-quantize-perf.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/test-rope.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/test-sampling.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/test-tokenizer-0-falcon.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/test-tokenizer-0-falcon.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/test-tokenizer-0-llama.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/test-tokenizer-0-llama.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/tests/test-tokenizer-1-llama.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.39}/vendor/llama.cpp/unicode.h +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
34f31040f610925552a66b3a033e31320b6f6ad8 '34f31040f610925552a66b3a033e31320b6f6ad8' of https://github.com/abetlen/llama-cpp-python
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
34f31040f610925552a66b3a033e31320b6f6ad8
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
[gc]
|
|
10
10
|
auto = 0
|
|
11
11
|
[http "https://github.com/"]
|
|
12
|
-
extraheader = AUTHORIZATION: basic
|
|
12
|
+
extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzX3J2Wk5WNnkzdklaWVhQWDVqc2F4NXZUUTJQdXpUcDRCUXVHSQ==
|
|
13
13
|
[submodule "vendor/llama.cpp"]
|
|
14
14
|
active = true
|
|
15
15
|
url = https://github.com/ggerganov/llama.cpp.git
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 34f31040f610925552a66b3a033e31320b6f6ad8 runner <runner@fv-az1019-910.m5ch4o5xfz2e5czaqtqza4205h.dx.internal.cloudapp.net> 1707241738 +0000 checkout: moving from master to refs/tags/v0.2.39
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
b08f22c882a1443e6b97081f3ce718a4d1a741f8 'b08f22c882a1443e6b97081f3ce718a4d1a741f8' of https://github.com/ggerganov/llama.cpp
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
b08f22c882a1443e6b97081f3ce718a4d1a741f8
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
[gc]
|
|
14
14
|
auto = 0
|
|
15
15
|
[http "https://github.com/"]
|
|
16
|
-
extraheader = AUTHORIZATION: basic
|
|
16
|
+
extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzX3J2Wk5WNnkzdklaWVhQWDVqc2F4NXZUUTJQdXpUcDRCUXVHSQ==
|
|
17
17
|
[url "https://github.com/"]
|
|
18
18
|
insteadOf = git@github.com:
|
|
19
19
|
insteadOf = org-6826477@github.com:
|
|
Binary file
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 17c97fb0620448b37516a3f53fea6c482b0a30a4 runner <runner@fv-az1019-910.m5ch4o5xfz2e5czaqtqza4205h.dx.internal.cloudapp.net> 1707241739 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
|
|
2
|
+
17c97fb0620448b37516a3f53fea6c482b0a30a4 b08f22c882a1443e6b97081f3ce718a4d1a741f8 runner <runner@fv-az1019-910.m5ch4o5xfz2e5czaqtqza4205h.dx.internal.cloudapp.net> 1707241741 +0000 checkout: moving from master to b08f22c882a1443e6b97081f3ce718a4d1a741f8
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 17c97fb0620448b37516a3f53fea6c482b0a30a4 runner <runner@fv-az1019-910.m5ch4o5xfz2e5czaqtqza4205h.dx.internal.cloudapp.net> 1707241739 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 17c97fb0620448b37516a3f53fea6c482b0a30a4 runner <runner@fv-az1019-910.m5ch4o5xfz2e5czaqtqza4205h.dx.internal.cloudapp.net> 1707241739 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
17c97fb0620448b37516a3f53fea6c482b0a30a4
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
34f31040f610925552a66b3a033e31320b6f6ad8
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
34f31040f610925552a66b3a033e31320b6f6ad8
|
|
@@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.2.39]
|
|
11
|
+
|
|
12
|
+
- feat: Update llama.cpp to ggerganov/llama.cpp@b08f22c882a1443e6b97081f3ce718a4d1a741f8
|
|
13
|
+
- fix: Fix destructor logging bugs by using llama_log_callback to avoid suppress_stdout_stderr by @abetlen in 59760c85eddc72dfcc1839f43760ef72c23d6874
|
|
14
|
+
|
|
15
|
+
## [0.2.38]
|
|
16
|
+
|
|
17
|
+
- feat: Update llama.cpp to ggerganov/llama.cpp@1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915
|
|
18
|
+
- feat: Add speculative decoding by @abetlen in #1120
|
|
19
|
+
- fix: Pass raise_exception and add_generation_prompt to jinja2 chat template by @abetlen in 078cca0361bf5a94d2cf52ed04980d20e32d6f95
|
|
20
|
+
|
|
10
21
|
## [0.2.37]
|
|
11
22
|
|
|
12
23
|
- feat: Update llama.cpp to ggerganov/llama.cpp@fea4fd4ba7f6b754ac795387b275e1a014a77bde
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: llama_cpp_python
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.39
|
|
4
4
|
Summary: Python bindings for the llama.cpp library
|
|
5
5
|
Author-Email: Andrei Betlen <abetlen@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -421,6 +421,24 @@ Then you'll need to use a custom chat handler to load the clip model and process
|
|
|
421
421
|
)
|
|
422
422
|
```
|
|
423
423
|
|
|
424
|
+
### Speculative Decoding
|
|
425
|
+
|
|
426
|
+
`llama-cpp-python` supports speculative decoding which allows the model to generate completions based on a draft model.
|
|
427
|
+
|
|
428
|
+
The fastest way to use speculative decoding is through the `LlamaPromptLookupDecoding` class.
|
|
429
|
+
|
|
430
|
+
Just pass this as a draft model to the `Llama` class during initialization.
|
|
431
|
+
|
|
432
|
+
```python
|
|
433
|
+
from llama_cpp import Llama
|
|
434
|
+
from llama_cpp.llama_speculative import LlamaPromptLookupDecoding
|
|
435
|
+
|
|
436
|
+
llama = Llama(
|
|
437
|
+
model_path="path/to/model.gguf",
|
|
438
|
+
draft_model=LlamaPromptLookupDecoding(num_pred_tokens=10) # num_pred_tokens is the number of tokens to predict 10 is the default and generally good for gpu, 2 performs better for cpu-only machines.
|
|
439
|
+
)
|
|
440
|
+
```
|
|
441
|
+
|
|
424
442
|
### Adjusting the Context Window
|
|
425
443
|
|
|
426
444
|
The context window of the Llama models determines the maximum number of tokens that can be processed at once. By default, this is set to 512 tokens, but can be adjusted based on your requirements.
|
|
@@ -378,6 +378,24 @@ Then you'll need to use a custom chat handler to load the clip model and process
|
|
|
378
378
|
)
|
|
379
379
|
```
|
|
380
380
|
|
|
381
|
+
### Speculative Decoding
|
|
382
|
+
|
|
383
|
+
`llama-cpp-python` supports speculative decoding which allows the model to generate completions based on a draft model.
|
|
384
|
+
|
|
385
|
+
The fastest way to use speculative decoding is through the `LlamaPromptLookupDecoding` class.
|
|
386
|
+
|
|
387
|
+
Just pass this as a draft model to the `Llama` class during initialization.
|
|
388
|
+
|
|
389
|
+
```python
|
|
390
|
+
from llama_cpp import Llama
|
|
391
|
+
from llama_cpp.llama_speculative import LlamaPromptLookupDecoding
|
|
392
|
+
|
|
393
|
+
llama = Llama(
|
|
394
|
+
model_path="path/to/model.gguf",
|
|
395
|
+
draft_model=LlamaPromptLookupDecoding(num_pred_tokens=10) # num_pred_tokens is the number of tokens to predict 10 is the default and generally good for gpu, 2 performs better for cpu-only machines.
|
|
396
|
+
)
|
|
397
|
+
```
|
|
398
|
+
|
|
381
399
|
### Adjusting the Context Window
|
|
382
400
|
|
|
383
401
|
The context window of the Llama models determines the maximum number of tokens that can be processed at once. By default, this is set to 512 tokens, but can be adjusted based on your requirements.
|
|
@@ -18,8 +18,6 @@ from .llama_grammar import LlamaGrammar
|
|
|
18
18
|
|
|
19
19
|
import llama_cpp.llama_cpp as llama_cpp
|
|
20
20
|
|
|
21
|
-
from ._utils import suppress_stdout_stderr
|
|
22
|
-
|
|
23
21
|
|
|
24
22
|
# Python wrappers over llama.h structs
|
|
25
23
|
|
|
@@ -30,7 +28,6 @@ class _LlamaModel:
|
|
|
30
28
|
|
|
31
29
|
_llama_free_model = None
|
|
32
30
|
# NOTE: this must be "saved" here to avoid exceptions when calling __del__
|
|
33
|
-
_suppress_stdout_stderr = suppress_stdout_stderr
|
|
34
31
|
|
|
35
32
|
def __init__(
|
|
36
33
|
self,
|
|
@@ -48,16 +45,14 @@ class _LlamaModel:
|
|
|
48
45
|
if not os.path.exists(path_model):
|
|
49
46
|
raise ValueError(f"Model path does not exist: {path_model}")
|
|
50
47
|
|
|
51
|
-
|
|
52
|
-
self.
|
|
53
|
-
|
|
54
|
-
)
|
|
48
|
+
self.model = llama_cpp.llama_load_model_from_file(
|
|
49
|
+
self.path_model.encode("utf-8"), self.params
|
|
50
|
+
)
|
|
55
51
|
|
|
56
52
|
def __del__(self):
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
self.model = None
|
|
53
|
+
if self.model is not None and self._llama_free_model is not None:
|
|
54
|
+
self._llama_free_model(self.model)
|
|
55
|
+
self.model = None
|
|
61
56
|
|
|
62
57
|
def vocab_type(self) -> int:
|
|
63
58
|
assert self.model is not None
|
|
@@ -240,8 +235,6 @@ class _LlamaContext:
|
|
|
240
235
|
NOTE: For stability it's recommended you use the Llama class instead."""
|
|
241
236
|
|
|
242
237
|
_llama_free = None
|
|
243
|
-
# NOTE: this must be "saved" here to avoid exceptions when calling __del__
|
|
244
|
-
_suppress_stdout_stderr = suppress_stdout_stderr
|
|
245
238
|
|
|
246
239
|
def __init__(
|
|
247
240
|
self,
|
|
@@ -256,16 +249,16 @@ class _LlamaContext:
|
|
|
256
249
|
|
|
257
250
|
self._llama_free = llama_cpp._lib.llama_free # type: ignore
|
|
258
251
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
252
|
+
assert self.model.model is not None
|
|
253
|
+
|
|
254
|
+
self.ctx = llama_cpp.llama_new_context_with_model(
|
|
255
|
+
self.model.model, self.params
|
|
256
|
+
)
|
|
263
257
|
|
|
264
258
|
def __del__(self):
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
self.ctx = None
|
|
259
|
+
if self.ctx is not None and self._llama_free is not None:
|
|
260
|
+
self._llama_free(self.ctx)
|
|
261
|
+
self.ctx = None
|
|
269
262
|
|
|
270
263
|
def n_ctx(self) -> int:
|
|
271
264
|
assert self.ctx is not None
|
|
@@ -493,8 +486,6 @@ class _LlamaContext:
|
|
|
493
486
|
|
|
494
487
|
class _LlamaBatch:
|
|
495
488
|
_llama_batch_free = None
|
|
496
|
-
# NOTE: this must be "saved" here to avoid exceptions when calling __del__
|
|
497
|
-
_suppress_stdout_stderr = suppress_stdout_stderr
|
|
498
489
|
|
|
499
490
|
def __init__(
|
|
500
491
|
self, *, n_tokens: int, embd: int, n_seq_max: int, verbose: bool = True
|
|
@@ -506,16 +497,14 @@ class _LlamaBatch:
|
|
|
506
497
|
|
|
507
498
|
self._llama_batch_free = llama_cpp._lib.llama_batch_free # type: ignore
|
|
508
499
|
|
|
509
|
-
|
|
510
|
-
self.
|
|
511
|
-
|
|
512
|
-
)
|
|
500
|
+
self.batch = llama_cpp.llama_batch_init(
|
|
501
|
+
self.n_tokens, self.embd, self.n_seq_max
|
|
502
|
+
)
|
|
513
503
|
|
|
514
504
|
def __del__(self):
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
self.batch = None
|
|
505
|
+
if self.batch is not None and self._llama_batch_free is not None:
|
|
506
|
+
self._llama_batch_free(self.batch)
|
|
507
|
+
self.batch = None
|
|
519
508
|
|
|
520
509
|
def set_batch(self, batch: Sequence[int], n_past: int, logits_all: bool):
|
|
521
510
|
assert self.batch is not None
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import ctypes
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
import llama_cpp
|
|
6
|
+
|
|
7
|
+
# enum ggml_log_level {
|
|
8
|
+
# GGML_LOG_LEVEL_ERROR = 2,
|
|
9
|
+
# GGML_LOG_LEVEL_WARN = 3,
|
|
10
|
+
# GGML_LOG_LEVEL_INFO = 4,
|
|
11
|
+
# GGML_LOG_LEVEL_DEBUG = 5
|
|
12
|
+
# };
|
|
13
|
+
GGML_LOG_LEVEL_TO_LOGGING_LEVEL = {
|
|
14
|
+
2: logging.ERROR,
|
|
15
|
+
3: logging.WARNING,
|
|
16
|
+
4: logging.INFO,
|
|
17
|
+
5: logging.DEBUG,
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger("llama-cpp-python")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@llama_cpp.llama_log_callback
|
|
24
|
+
def llama_log_callback(
|
|
25
|
+
level: int,
|
|
26
|
+
text: bytes,
|
|
27
|
+
user_data: ctypes.c_void_p,
|
|
28
|
+
):
|
|
29
|
+
if logger.level <= GGML_LOG_LEVEL_TO_LOGGING_LEVEL[level]:
|
|
30
|
+
print(text.decode("utf-8"), end="", flush=True, file=sys.stderr)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
llama_cpp.llama_log_set(llama_log_callback, ctypes.c_void_p(0))
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def set_verbose(verbose: bool):
|
|
37
|
+
logger.setLevel(logging.DEBUG if verbose else logging.ERROR)
|
|
@@ -30,16 +30,20 @@ from .llama_cache import (
|
|
|
30
30
|
import llama_cpp.llama_cpp as llama_cpp
|
|
31
31
|
import llama_cpp.llama_chat_format as llama_chat_format
|
|
32
32
|
|
|
33
|
+
from llama_cpp.llama_speculative import LlamaDraftModel
|
|
34
|
+
|
|
33
35
|
import numpy as np
|
|
34
36
|
import numpy.typing as npt
|
|
35
37
|
|
|
36
|
-
from ._utils import suppress_stdout_stderr
|
|
37
38
|
from ._internals import (
|
|
38
39
|
_LlamaModel, # type: ignore
|
|
39
40
|
_LlamaContext, # type: ignore
|
|
40
41
|
_LlamaBatch, # type: ignore
|
|
41
42
|
_LlamaTokenDataArray, # type: ignore
|
|
43
|
+
_LlamaSamplingParams, # type: ignore
|
|
44
|
+
_LlamaSamplingContext, # type: ignore
|
|
42
45
|
)
|
|
46
|
+
from ._logger import set_verbose
|
|
43
47
|
|
|
44
48
|
|
|
45
49
|
class Llama:
|
|
@@ -89,6 +93,8 @@ class Llama:
|
|
|
89
93
|
# Chat Format Params
|
|
90
94
|
chat_format: Optional[str] = None,
|
|
91
95
|
chat_handler: Optional[llama_chat_format.LlamaChatCompletionHandler] = None,
|
|
96
|
+
# Speculative Decoding
|
|
97
|
+
draft_model: Optional[LlamaDraftModel] = None,
|
|
92
98
|
# Misc
|
|
93
99
|
verbose: bool = True,
|
|
94
100
|
# Extra Params
|
|
@@ -152,6 +158,7 @@ class Llama:
|
|
|
152
158
|
numa: Enable NUMA support. (NOTE: The initial value of this parameter is used for the remainder of the program as this value is set in llama_backend_init)
|
|
153
159
|
chat_format: String specifying the chat format to use when calling create_chat_completion.
|
|
154
160
|
chat_handler: Optional chat handler to use when calling create_chat_completion.
|
|
161
|
+
draft_model: Optional draft model to use for speculative decoding.
|
|
155
162
|
verbose: Print verbose output to stderr.
|
|
156
163
|
|
|
157
164
|
Raises:
|
|
@@ -162,10 +169,11 @@ class Llama:
|
|
|
162
169
|
"""
|
|
163
170
|
self.verbose = verbose
|
|
164
171
|
|
|
172
|
+
set_verbose(verbose)
|
|
173
|
+
|
|
165
174
|
self.numa = numa
|
|
166
175
|
if not Llama.__backend_initialized:
|
|
167
|
-
|
|
168
|
-
llama_cpp.llama_backend_init(self.numa)
|
|
176
|
+
llama_cpp.llama_backend_init(self.numa)
|
|
169
177
|
Llama.__backend_initialized = True
|
|
170
178
|
|
|
171
179
|
self.model_path = model_path
|
|
@@ -315,6 +323,8 @@ class Llama:
|
|
|
315
323
|
self.chat_format = chat_format
|
|
316
324
|
self.chat_handler = chat_handler
|
|
317
325
|
|
|
326
|
+
self.draft_model = draft_model
|
|
327
|
+
|
|
318
328
|
self._n_vocab = self.n_vocab()
|
|
319
329
|
self._n_ctx = self.n_ctx()
|
|
320
330
|
|
|
@@ -503,6 +513,7 @@ class Llama:
|
|
|
503
513
|
penalize_nl: bool = True,
|
|
504
514
|
logits_processor: Optional[LogitsProcessorList] = None,
|
|
505
515
|
grammar: Optional[LlamaGrammar] = None,
|
|
516
|
+
idx: Optional[int] = None,
|
|
506
517
|
):
|
|
507
518
|
"""Sample a token from the model.
|
|
508
519
|
|
|
@@ -517,77 +528,46 @@ class Llama:
|
|
|
517
528
|
"""
|
|
518
529
|
assert self._ctx is not None
|
|
519
530
|
assert self.n_tokens > 0
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
n_ctx = self._n_ctx
|
|
526
|
-
top_k = n_vocab if top_k <= 0 else top_k
|
|
527
|
-
last_n_tokens_size = n_ctx if last_n_tokens_size < 0 else last_n_tokens_size
|
|
528
|
-
last_n_tokens_data_c = (llama_cpp.llama_token * last_n_tokens_size)(
|
|
529
|
-
*last_n_tokens_data
|
|
530
|
-
)
|
|
531
|
-
logits: npt.NDArray[np.single] = self._scores[-1, :]
|
|
531
|
+
|
|
532
|
+
if idx is None:
|
|
533
|
+
logits: npt.NDArray[np.single] = self._scores[-1, :]
|
|
534
|
+
else:
|
|
535
|
+
logits = self._scores[idx, :]
|
|
532
536
|
|
|
533
537
|
if logits_processor is not None:
|
|
534
|
-
logits[:] =
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
538
|
+
logits[:] = (
|
|
539
|
+
logits_processor(self._input_ids, logits)
|
|
540
|
+
if idx is None
|
|
541
|
+
else logits_processor(self._input_ids[:idx], logits)
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
sampling_params = _LlamaSamplingParams(
|
|
545
|
+
top_k=top_k,
|
|
546
|
+
top_p=top_p,
|
|
547
|
+
min_p=min_p,
|
|
548
|
+
tfs_z=tfs_z,
|
|
549
|
+
typical_p=typical_p,
|
|
550
|
+
temp=temp,
|
|
551
|
+
penalty_last_n=self.last_n_tokens_size,
|
|
542
552
|
penalty_repeat=repeat_penalty,
|
|
543
553
|
penalty_freq=frequency_penalty,
|
|
544
554
|
penalty_present=presence_penalty,
|
|
555
|
+
mirostat=mirostat_mode,
|
|
556
|
+
mirostat_tau=mirostat_tau,
|
|
557
|
+
mirostat_eta=mirostat_eta,
|
|
558
|
+
penalize_nl=penalize_nl,
|
|
559
|
+
)
|
|
560
|
+
sampling_context = _LlamaSamplingContext(
|
|
561
|
+
params=sampling_params,
|
|
562
|
+
grammar=grammar,
|
|
563
|
+
)
|
|
564
|
+
sampling_context.prev = list(self.eval_tokens)
|
|
565
|
+
id = sampling_context.sample(ctx_main=self._ctx, logits_array=logits)
|
|
566
|
+
sampling_context.accept(
|
|
567
|
+
ctx_main=self._ctx,
|
|
568
|
+
id=id,
|
|
569
|
+
apply_grammar=grammar is not None,
|
|
545
570
|
)
|
|
546
|
-
if not penalize_nl:
|
|
547
|
-
self._candidates.candidates.data[self._token_nl].logit = llama_cpp.c_float(
|
|
548
|
-
nl_logit
|
|
549
|
-
)
|
|
550
|
-
|
|
551
|
-
if grammar is not None:
|
|
552
|
-
self._ctx.sample_grammar(
|
|
553
|
-
candidates=self._candidates,
|
|
554
|
-
grammar=grammar,
|
|
555
|
-
)
|
|
556
|
-
|
|
557
|
-
if temp < 0.0:
|
|
558
|
-
self._ctx.sample_softmax(candidates=self._candidates)
|
|
559
|
-
id = self._candidates.candidates.data[0].id
|
|
560
|
-
elif temp == 0.0:
|
|
561
|
-
id = self._ctx.sample_token_greedy(candidates=self._candidates)
|
|
562
|
-
elif mirostat_mode == 1:
|
|
563
|
-
self._ctx.sample_temp(candidates=self._candidates, temp=temp)
|
|
564
|
-
id = self._ctx.sample_token_mirostat(
|
|
565
|
-
candidates=self._candidates,
|
|
566
|
-
tau=mirostat_tau,
|
|
567
|
-
eta=mirostat_eta,
|
|
568
|
-
mu=ctypes.pointer(self._mirostat_mu),
|
|
569
|
-
m=100,
|
|
570
|
-
)
|
|
571
|
-
elif mirostat_mode == 2:
|
|
572
|
-
self._ctx.sample_temp(candidates=self._candidates, temp=temp)
|
|
573
|
-
id = self._ctx.sample_token_mirostat_v2(
|
|
574
|
-
candidates=self._candidates,
|
|
575
|
-
tau=mirostat_tau,
|
|
576
|
-
eta=mirostat_eta,
|
|
577
|
-
mu=ctypes.pointer(self._mirostat_mu),
|
|
578
|
-
)
|
|
579
|
-
else:
|
|
580
|
-
self._ctx.sample_top_k(candidates=self._candidates, k=top_k, min_keep=1)
|
|
581
|
-
self._ctx.sample_tail_free(candidates=self._candidates, z=tfs_z, min_keep=1)
|
|
582
|
-
self._ctx.sample_typical(
|
|
583
|
-
candidates=self._candidates, p=typical_p, min_keep=1
|
|
584
|
-
)
|
|
585
|
-
self._ctx.sample_top_p(candidates=self._candidates, p=top_p, min_keep=1)
|
|
586
|
-
self._ctx.sample_min_p(candidates=self._candidates, p=min_p, min_keep=1)
|
|
587
|
-
self._ctx.sample_temp(candidates=self._candidates, temp=temp)
|
|
588
|
-
id = self._ctx.sample_token(candidates=self._candidates)
|
|
589
|
-
if grammar is not None:
|
|
590
|
-
self._ctx.grammar_accept_token(grammar=grammar, token=id)
|
|
591
571
|
return id
|
|
592
572
|
|
|
593
573
|
def generate(
|
|
@@ -656,34 +636,56 @@ class Llama:
|
|
|
656
636
|
if grammar is not None:
|
|
657
637
|
grammar.reset()
|
|
658
638
|
|
|
639
|
+
sample_idx = self.n_tokens + len(tokens) - 1
|
|
640
|
+
tokens = list(tokens)
|
|
641
|
+
|
|
659
642
|
# Eval and sample
|
|
660
643
|
while True:
|
|
661
644
|
self.eval(tokens)
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
645
|
+
while sample_idx < self.n_tokens:
|
|
646
|
+
token = self.sample(
|
|
647
|
+
top_k=top_k,
|
|
648
|
+
top_p=top_p,
|
|
649
|
+
min_p=min_p,
|
|
650
|
+
typical_p=typical_p,
|
|
651
|
+
temp=temp,
|
|
652
|
+
repeat_penalty=repeat_penalty,
|
|
653
|
+
frequency_penalty=frequency_penalty,
|
|
654
|
+
presence_penalty=presence_penalty,
|
|
655
|
+
tfs_z=tfs_z,
|
|
656
|
+
mirostat_mode=mirostat_mode,
|
|
657
|
+
mirostat_tau=mirostat_tau,
|
|
658
|
+
mirostat_eta=mirostat_eta,
|
|
659
|
+
logits_processor=logits_processor,
|
|
660
|
+
grammar=grammar,
|
|
661
|
+
penalize_nl=penalize_nl,
|
|
662
|
+
idx=sample_idx,
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
sample_idx += 1
|
|
666
|
+
if stopping_criteria is not None and stopping_criteria(
|
|
667
|
+
self._input_ids, self._scores[-1, :]
|
|
668
|
+
):
|
|
669
|
+
return
|
|
670
|
+
tokens_or_none = yield token
|
|
671
|
+
tokens.clear()
|
|
672
|
+
tokens.append(token)
|
|
673
|
+
if tokens_or_none is not None:
|
|
674
|
+
tokens.extend(tokens_or_none)
|
|
675
|
+
|
|
676
|
+
if sample_idx < self.n_tokens and token != self._input_ids[sample_idx]:
|
|
677
|
+
self.n_tokens = sample_idx
|
|
678
|
+
self._ctx.kv_cache_seq_rm(-1, self.n_tokens, -1)
|
|
679
|
+
break
|
|
680
|
+
|
|
681
|
+
if self.draft_model is not None:
|
|
682
|
+
self.input_ids[self.n_tokens : self.n_tokens + len(tokens)] = tokens
|
|
683
|
+
draft_tokens = self.draft_model(self.input_ids[:self.n_tokens + len(tokens)])
|
|
684
|
+
tokens.extend(
|
|
685
|
+
draft_tokens.astype(int)[
|
|
686
|
+
: self._n_ctx - self.n_tokens - len(tokens)
|
|
687
|
+
]
|
|
688
|
+
)
|
|
687
689
|
|
|
688
690
|
def create_embedding(
|
|
689
691
|
self, input: Union[str, List[str]], model: Optional[str] = None
|
|
@@ -185,16 +185,17 @@ class Jinja2ChatFormatter(ChatFormatter):
|
|
|
185
185
|
messages: List[llama_types.ChatCompletionRequestMessage],
|
|
186
186
|
**kwargs: Any,
|
|
187
187
|
) -> ChatFormatterResponse:
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
llama_types.ChatCompletionRequestAssistantMessage(
|
|
192
|
-
role="assistant", content=""
|
|
193
|
-
),
|
|
194
|
-
]
|
|
188
|
+
def raise_exception(message: str):
|
|
189
|
+
raise ValueError(message)
|
|
190
|
+
|
|
195
191
|
prompt = self._environment.render(
|
|
196
|
-
messages=messages,
|
|
192
|
+
messages=messages,
|
|
193
|
+
eos_token=self.eos_token,
|
|
194
|
+
bos_token=self.bos_token,
|
|
195
|
+
raise_exception=raise_exception,
|
|
196
|
+
add_generation_prompt=self.add_generation_prompt
|
|
197
197
|
)
|
|
198
|
+
|
|
198
199
|
return ChatFormatterResponse(prompt=prompt, stop=[self.eos_token])
|
|
199
200
|
|
|
200
201
|
def to_chat_handler(self) -> LlamaChatCompletionHandler:
|