llama-cpp-python 0.2.37__tar.gz → 0.2.38__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_cpp_python-0.2.38/.git/FETCH_HEAD +1 -0
- llama_cpp_python-0.2.38/.git/HEAD +1 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/config +1 -1
- llama_cpp_python-0.2.38/.git/index +0 -0
- llama_cpp_python-0.2.38/.git/logs/HEAD +1 -0
- llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/HEAD +1 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/config +1 -1
- llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/index +0 -0
- llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/logs/HEAD +2 -0
- llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/logs/refs/heads/master +1 -0
- llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/logs/refs/remotes/origin/HEAD +1 -0
- llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/objects/pack/pack-840f4459d494ce7fd10b79596f309b54b31652b8.idx +0 -0
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/objects/pack/pack-2dab3d9b62e8dec5ea0d7cf3608572059e30b9db.pack → llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/objects/pack/pack-840f4459d494ce7fd10b79596f309b54b31652b8.pack +0 -0
- llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/objects/pack/pack-840f4459d494ce7fd10b79596f309b54b31652b8.rev +0 -0
- llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/packed-refs +2 -0
- llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/refs/heads/master +1 -0
- llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/shallow +1 -0
- llama_cpp_python-0.2.38/.git/objects/pack/pack-a38d8e19feb0b2901a657ea0b79846878599b3fc.idx +0 -0
- llama_cpp_python-0.2.38/.git/objects/pack/pack-a38d8e19feb0b2901a657ea0b79846878599b3fc.pack +0 -0
- llama_cpp_python-0.2.38/.git/objects/pack/pack-a38d8e19feb0b2901a657ea0b79846878599b3fc.rev +0 -0
- llama_cpp_python-0.2.38/.git/refs/tags/v0.2.38 +1 -0
- llama_cpp_python-0.2.38/.git/shallow +1 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/CHANGELOG.md +6 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/PKG-INFO +19 -1
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/README.md +18 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/examples/high_level_api/fastapi_server.py +1 -1
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/llama_cpp/__init__.py +1 -1
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/llama_cpp/llama.py +91 -90
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/llama_cpp/llama_chat_format.py +9 -8
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/llama_cpp/llama_cpp.py +34 -7
- llama_cpp_python-0.2.38/llama_cpp/llama_speculative.py +64 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/llama_cpp/server/model.py +9 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/llama_cpp/server/settings.py +9 -0
- llama_cpp_python-0.2.38/tests/test_llama_speculative.py +16 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/build.yml +29 -2
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/editorconfig.yml +6 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.gitignore +1 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/CMakeLists.txt +7 -7
- llama_cpp_python-0.2.37/vendor/llama.cpp/README_sycl.md → llama_cpp_python-0.2.38/vendor/llama.cpp/README-sycl.md +184 -10
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/README.md +4 -1
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/build-info.cpp +1 -1
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/common.cpp +30 -28
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/common.h +34 -34
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/train.cpp +6 -6
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched-bench/batched-bench.cpp +1 -1
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama-bench/llama-bench.cpp +19 -12
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/MobileVLM-README.md +56 -2
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/chat.sh +1 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/server.cpp +73 -64
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/sycl/ls-sycl-device.cpp +6 -4
- llama_cpp_python-0.2.38/vendor/llama.cpp/examples/sycl/win-build-sycl.bat +23 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/examples/sycl/win-run-llama2.bat +13 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-cuda.cu +192 -17
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-metal.m +10 -3
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-metal.metal +29 -4
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-sycl.cpp +11 -4
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-sycl.h +5 -4
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-vulkan-shaders.hpp +590 -1162
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-vulkan.cpp +11 -3
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml.c +117 -18
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml.h +3 -1
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml_vk_generate_shaders.py +2 -2
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/llama.cpp +154 -133
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/llama.h +10 -19
- llama_cpp_python-0.2.38/vendor/llama.cpp/scripts/install-oneapi.bat +19 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-backend-ops.cpp +69 -5
- llama_cpp_python-0.2.37/.git/FETCH_HEAD +0 -1
- llama_cpp_python-0.2.37/.git/HEAD +0 -1
- llama_cpp_python-0.2.37/.git/index +0 -0
- llama_cpp_python-0.2.37/.git/logs/HEAD +0 -1
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/HEAD +0 -1
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/index +0 -0
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/logs/HEAD +0 -2
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/logs/refs/heads/master +0 -1
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/logs/refs/remotes/origin/HEAD +0 -1
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/objects/pack/pack-2dab3d9b62e8dec5ea0d7cf3608572059e30b9db.idx +0 -0
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/objects/pack/pack-2dab3d9b62e8dec5ea0d7cf3608572059e30b9db.rev +0 -0
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/packed-refs +0 -2
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/refs/heads/master +0 -1
- llama_cpp_python-0.2.37/.git/modules/vendor/llama.cpp/shallow +0 -1
- llama_cpp_python-0.2.37/.git/objects/02/c09afb0bf5559d3fe64ce67f4ff82af32ff50f +0 -0
- llama_cpp_python-0.2.37/.git/objects/03/667ba9b14656308c89e62f61377b295604a99d +0 -0
- llama_cpp_python-0.2.37/.git/objects/04/5856ea2ffe697ec33db1c1c989bd45cde5bb3d +0 -0
- llama_cpp_python-0.2.37/.git/objects/04/7bc14424303575f73af90611fec827334f54e8 +0 -0
- llama_cpp_python-0.2.37/.git/objects/0a/77bbdaa388e239d44a9f001f811b1e31d9ad7a +0 -0
- llama_cpp_python-0.2.37/.git/objects/0d/870969f4b23bd92a09ec29134d3fb454d38bec +0 -0
- llama_cpp_python-0.2.37/.git/objects/0e/2a907c0e40de1a432ee0cd2e3d01c634df1a37 +0 -0
- llama_cpp_python-0.2.37/.git/objects/11/2a0f7872fa9244bf38729a2722dc5c08dec20c +0 -0
- llama_cpp_python-0.2.37/.git/objects/11/b6d5c26e3f7157658952b8ec353e985d522fac +0 -1
- llama_cpp_python-0.2.37/.git/objects/13/c5b6b0df5f67e80cbe584909b83777901265a1 +0 -0
- llama_cpp_python-0.2.37/.git/objects/19/9bd4ffbf88c68c98b52c05f388dfa92716f6b7 +0 -0
- llama_cpp_python-0.2.37/.git/objects/1a/5152530cfbde487c928b60269a29fa5219f617 +0 -0
- llama_cpp_python-0.2.37/.git/objects/21/68579c83b3b92db0ba1839b37ee186323abbef +0 -0
- llama_cpp_python-0.2.37/.git/objects/23/e37d4d40e5ec0bfd85b5e928834d58e2cf0da6 +0 -0
- llama_cpp_python-0.2.37/.git/objects/24/04228325d88c59b819acaedb20b15635f75df9 +0 -0
- llama_cpp_python-0.2.37/.git/objects/27/2e4767b47397d1529e6a2d01298144d9d746ba +0 -0
- llama_cpp_python-0.2.37/.git/objects/27/a6b1e5042318f85dc99c9f24f4dd6fc472d242 +0 -0
- llama_cpp_python-0.2.37/.git/objects/29/03e0146d304bcacbfabfe71f171a2edc03043e +0 -0
- llama_cpp_python-0.2.37/.git/objects/2a/721872078dd3a83317627fa5a8e3ca6c6cb3f3 +0 -0
- llama_cpp_python-0.2.37/.git/objects/2c/c6fb02dcb3226c1ecf0fbed6453eb3acadeaa2 +0 -0
- llama_cpp_python-0.2.37/.git/objects/36/8022c457f6dd5bb566f7e2a21d88850bd98768 +0 -0
- llama_cpp_python-0.2.37/.git/objects/39/697bfc2538bd4558018d0d721d6d6028c4bb56 +0 -2
- llama_cpp_python-0.2.37/.git/objects/3a/1d7180d508818fe957923e00dcd8950938632d +0 -5
- llama_cpp_python-0.2.37/.git/objects/3a/6457dcdfd47e764654bacae0ba8347976b645a +0 -0
- llama_cpp_python-0.2.37/.git/objects/3d/d00767671c5e9dac5a2ab8f4f1331531294b60 +0 -0
- llama_cpp_python-0.2.37/.git/objects/41/3097201ac0fc27ef4dcf518b699a6fb610a54c +0 -0
- llama_cpp_python-0.2.37/.git/objects/41/cc68ea2402cf682807649d841e7c0f4175db01 +0 -0
- llama_cpp_python-0.2.37/.git/objects/43/5af43a845bd86d9bc7ed55f735a7e100ad566b +0 -0
- llama_cpp_python-0.2.37/.git/objects/47/4503fdfc554d8caabee4f321a80427f8c7d696 +0 -0
- llama_cpp_python-0.2.37/.git/objects/4a/106470b0b650cd76f2f5d00d744b615c72bed3 +0 -0
- llama_cpp_python-0.2.37/.git/objects/4b/3189dd1a54be3bc416786ddf184dd047dc4b20 +0 -0
- llama_cpp_python-0.2.37/.git/objects/4b/c4a6c9741137cb9351feebdd77ae023beae9ec +0 -0
- llama_cpp_python-0.2.37/.git/objects/4c/e899ccd257ed95da4f07179bfff30024aeefb8 +0 -0
- llama_cpp_python-0.2.37/.git/objects/51/f357200f8b998031f4be924e11ed2ae4bf3fea +0 -0
- llama_cpp_python-0.2.37/.git/objects/56/2410fe1a4c4093d62e1705933eaf66c6d99a2c +0 -2
- llama_cpp_python-0.2.37/.git/objects/5f/350ffe99abe7297b2839f36fd0eaeb98887805 +0 -0
- llama_cpp_python-0.2.37/.git/objects/60/bc7aef42aac0409cfdca666ad2ff6f516d7b5b +0 -0
- llama_cpp_python-0.2.37/.git/objects/61/027ef99725c50b0891fdbf0bf263a33abe648f +0 -0
- llama_cpp_python-0.2.37/.git/objects/65/1cd4ccf800ecca4ed5d0dab493b41f97d0d117 +0 -0
- llama_cpp_python-0.2.37/.git/objects/68/7316b327ca038d26a338b3070a7d4698322515 +0 -0
- llama_cpp_python-0.2.37/.git/objects/74/7c6130e3cb1479d20e2013b1dd3db3379c2266 +0 -1
- llama_cpp_python-0.2.37/.git/objects/76/e26fbd106895fba52861f8ac1e11cc6ee2a307 +0 -0
- llama_cpp_python-0.2.37/.git/objects/79/5dad726ae91f330c56e49bc188080b5b3b5ba9 +0 -0
- llama_cpp_python-0.2.37/.git/objects/79/a9e67a1aee09c6d182f240ba5eef32feabcbce +0 -3
- llama_cpp_python-0.2.37/.git/objects/7a/7b899ec7dd48d192cba14ac9c8e9df4233f7fb +0 -0
- llama_cpp_python-0.2.37/.git/objects/7d/5498f9d2c49c1f731b47845d845f6c5e16a3d9 +0 -0
- llama_cpp_python-0.2.37/.git/objects/7d/6c970483161eaf43cfa9d50010c071d4953053 +0 -0
- llama_cpp_python-0.2.37/.git/objects/7e/df0975dc12ccc95ad14de085f07efe6d65c620 +0 -3
- llama_cpp_python-0.2.37/.git/objects/7e/e8f748eab47180cea09c0ad8e75c3b991b4af4 +0 -0
- llama_cpp_python-0.2.37/.git/objects/80/7b0f57a8a873e58ade0ff0f5b0bcf0ff66b7f9 +0 -0
- llama_cpp_python-0.2.37/.git/objects/82/d901c150002ef7bc17c9d77f2c5e3a2fed1240 +0 -0
- llama_cpp_python-0.2.37/.git/objects/82/e5c4487e57d6d59c901a73bdd2a9bc172fee7c +0 -3
- llama_cpp_python-0.2.37/.git/objects/8b/d03f88a1895cbf3ef249e13df79ee0efda779d +0 -2
- llama_cpp_python-0.2.37/.git/objects/91/abb11fdf507883caeeb2d2958e1c65fb6cbdc1 +0 -0
- llama_cpp_python-0.2.37/.git/objects/99/32d6130f5552a9b85c8b15b4ac6bc26b1068ce +0 -0
- llama_cpp_python-0.2.37/.git/objects/9a/fff3d0aa6ccce179543174cc7025807b87de08 +0 -0
- llama_cpp_python-0.2.37/.git/objects/9d/0ec2f705618e591cfa8d6512cb9a96b3da75f1 +0 -0
- llama_cpp_python-0.2.37/.git/objects/9d/3d3559849603efda6f3c8181684e4d19e0ec79 +0 -0
- llama_cpp_python-0.2.37/.git/objects/9e/9870a52245d4f245df5a4e1b89fda121d78214 +0 -0
- llama_cpp_python-0.2.37/.git/objects/9f/e1a7bfd96185d4dee4384ff2508682cd0d9ad6 +0 -0
- llama_cpp_python-0.2.37/.git/objects/a4/5e5d77363eb85144d4aff1a3cbe86ce94d5c92 +0 -0
- llama_cpp_python-0.2.37/.git/objects/a9/e51cdc1672134ec9af66c9eccf09f6da4ceccd +0 -0
- llama_cpp_python-0.2.37/.git/objects/ac/82b8fbb81087ec9b3a72d9e377102a31b28d16 +0 -0
- llama_cpp_python-0.2.37/.git/objects/b1/f90b91594f496ad9f27b1a68584984f4b523d2 +0 -0
- llama_cpp_python-0.2.37/.git/objects/b5/618c10dccf35deb0e69b4df9bc582710f84d07 +0 -0
- llama_cpp_python-0.2.37/.git/objects/b9/1632f5bc787c1404600c894a6a4126359747d8 +0 -0
- llama_cpp_python-0.2.37/.git/objects/bb/b68069d5f8125d64af0baffbd4695f1a0f729c +0 -0
- llama_cpp_python-0.2.37/.git/objects/bb/cbbe7d61558adde3cbfd0c7a63a67c27ed6d30 +0 -0
- llama_cpp_python-0.2.37/.git/objects/bf/9e824922a3fa95b336ad441eca7e42f9b33358 +0 -0
- llama_cpp_python-0.2.37/.git/objects/c1/0aee42e0da547428df7cac9845e246badf1803 +0 -0
- llama_cpp_python-0.2.37/.git/objects/c3/deba87b41117da6624f6d92a016ce50239dfc3 +0 -0
- llama_cpp_python-0.2.37/.git/objects/c8/5e73d2b657bb05ed99309615d67bac93d9f86e +0 -1
- llama_cpp_python-0.2.37/.git/objects/ca/ebbb67fdb02a0a8897d4e4826ea046a9931f6f +0 -0
- llama_cpp_python-0.2.37/.git/objects/cb/221880a66e3c1f2ca15a9df52ac4bcb765e7d4 +0 -0
- llama_cpp_python-0.2.37/.git/objects/cc/6a3a7252ea6e698614f0629d4bc040ab6ca717 +0 -0
- llama_cpp_python-0.2.37/.git/objects/cd/351ba33849dcf6af35b493f7405962fa1625d4 +0 -0
- llama_cpp_python-0.2.37/.git/objects/d1/ae9b564d3ab02c6b91162e52d822d36524edb3 +0 -0
- llama_cpp_python-0.2.37/.git/objects/d3/329eec3bac6ce7e54c76b77ac9bf99fab0fe3f +0 -0
- llama_cpp_python-0.2.37/.git/objects/d3/efb3a6fe1e001db62ec08e5d31ce1d08567045 +0 -0
- llama_cpp_python-0.2.37/.git/objects/d8/ef563c2dc69fe1ea223be7bc5b0efc27ad1f9c +0 -0
- llama_cpp_python-0.2.37/.git/objects/da/c33b74dddf06fcfc01244044eebb102cfcea37 +0 -0
- llama_cpp_python-0.2.37/.git/objects/e1/92a677cd79beca2909e667f7e22044ff1f700b +0 -0
- llama_cpp_python-0.2.37/.git/objects/e2/13518b95011cb6ee783986624c3b6de8659f81 +0 -0
- llama_cpp_python-0.2.37/.git/objects/e4/1f375774e6945e445bfb179502b128fe22dda7 +0 -0
- llama_cpp_python-0.2.37/.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 +0 -0
- llama_cpp_python-0.2.37/.git/objects/eb/0fb9662e690d0f9de4632cddd321b3f872a725 +0 -0
- llama_cpp_python-0.2.37/.git/objects/eb/9a2cfa9167df02f136502af79738c71363abfd +0 -0
- llama_cpp_python-0.2.37/.git/objects/ef/1b2c0162e8edd321e2b9c1ce375d96f1f1d048 +0 -0
- llama_cpp_python-0.2.37/.git/objects/f0/827d762e852a21f6406c469300899d5f509b8f +0 -0
- llama_cpp_python-0.2.37/.git/objects/f1/b8e9d154231932c4b7b9b59611626764e68632 +0 -0
- llama_cpp_python-0.2.37/.git/objects/f2/0813765a70679e8a063871c9ef75d75c65ccb6 +0 -0
- llama_cpp_python-0.2.37/.git/objects/fa/dfc5fb4fe6f5eb6d5d98b62519e374a5202b00 +0 -0
- llama_cpp_python-0.2.37/.git/objects/fc/25ff5160028dee3570249abc40cd57780bcca9 +0 -0
- llama_cpp_python-0.2.37/.git/objects/fc/ef8cd800ee8a265b146748d178cb56b5632bf3 +0 -0
- llama_cpp_python-0.2.37/.git/objects/fd/64c09b37947c97e58903ce570785c657d56722 +0 -0
- llama_cpp_python-0.2.37/.git/objects/fe/b0ed68d94eac48b844fd587ddfb808649716a1 +0 -1
- llama_cpp_python-0.2.37/.git/objects/ff/1484c2b966abc1ca2625fdae6a4fb52b8226f0 +0 -0
- llama_cpp_python-0.2.37/.git/objects/ff/3e950cd1110fe552912cea4c268c4023d2b737 +0 -3
- llama_cpp_python-0.2.37/.git/refs/tags/v0.2.37 +0 -1
- llama_cpp_python-0.2.37/.git/shallow +0 -1
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.dockerignore +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/description +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/hooks/applypatch-msg.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/hooks/commit-msg.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/hooks/fsmonitor-watchman.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/hooks/post-update.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/hooks/pre-applypatch.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/hooks/pre-commit.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/hooks/pre-merge-commit.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/hooks/pre-push.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/hooks/pre-rebase.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/hooks/pre-receive.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/hooks/prepare-commit-msg.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/hooks/push-to-checkout.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/hooks/sendemail-validate.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/hooks/update.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/info/exclude +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/description +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/applypatch-msg.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/commit-msg.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/fsmonitor-watchman.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/post-update.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/pre-applypatch.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/pre-commit.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/pre-merge-commit.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/pre-push.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/pre-rebase.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/pre-receive.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/prepare-commit-msg.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/push-to-checkout.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/sendemail-validate.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/update.sample +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/info/exclude +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/refs/remotes/origin/HEAD +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.github/dependabot.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.github/workflows/build-and-release.yaml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.github/workflows/build-docker.yaml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.github/workflows/publish-to-test.yaml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.github/workflows/publish.yaml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.github/workflows/test-pypi.yaml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.github/workflows/test.yaml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.gitignore +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.gitmodules +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/.readthedocs.yaml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/LICENSE.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/Makefile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/docker/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/docker/cuda_simple/Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/docker/open_llama/Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/docker/open_llama/build.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/docker/open_llama/hug_model.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/docker/open_llama/start.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/docker/open_llama/start_server.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/docker/openblas_simple/Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/docker/simple/Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/docker/simple/run.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/docs/api-reference.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/docs/changelog.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/docs/index.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/docs/install/macos.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/docs/requirements.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/docs/server.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/examples/high_level_api/high_level_api_embedding.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/examples/high_level_api/high_level_api_inference.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/examples/high_level_api/high_level_api_streaming.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/examples/high_level_api/langchain_custom_llm.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/examples/low_level_api/Chat.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/examples/low_level_api/Miku.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/examples/low_level_api/ReasonAct.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/examples/low_level_api/common.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/examples/low_level_api/low_level_api_chat_cpp.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/examples/low_level_api/low_level_api_llama_cpp.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/examples/low_level_api/quantize.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/examples/low_level_api/readme/low_level_api_llama_cpp.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/examples/low_level_api/util.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/examples/notebooks/Batching.ipynb +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/examples/notebooks/Clients.ipynb +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/examples/notebooks/Functions.ipynb +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/examples/notebooks/Guidance.ipynb +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/examples/notebooks/Multimodal.ipynb +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/examples/notebooks/PerformanceTuning.ipynb +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/llama_cpp/_internals.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/llama_cpp/_utils.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/llama_cpp/llama_cache.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/llama_cpp/llama_grammar.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/llama_cpp/llama_types.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/llama_cpp/llava_cpp.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/llama_cpp/py.typed +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/llama_cpp/server/__init__.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/llama_cpp/server/__main__.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/llama_cpp/server/app.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/llama_cpp/server/cli.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/llama_cpp/server/errors.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/llama_cpp/server/types.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/mkdocs.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/pyproject.toml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/tests/test_grammar.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/tests/test_llama.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/tests/test_llama_chat_format.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.clang-tidy +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/cloud-v-pipeline +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/full-cuda.Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/full-rocm.Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/full.Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/main-cuda.Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/main-intel.Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/main-rocm.Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/main.Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/nix/apps.nix +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/nix/devshells.nix +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/nix/jetson-support.nix +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/nix/nixpkgs-instances.nix +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/nix/package.nix +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/nix/scope.nix +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/server-cuda.Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/server-intel.Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/server-rocm.Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/server.Dockerfile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/tools.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.dockerignore +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.ecrc +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.editorconfig +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.flake8 +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.git +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/ISSUE_TEMPLATE/bug.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/ISSUE_TEMPLATE/enhancement.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/code-coverage.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/docker.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/gguf-publish.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/nix-ci.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/nix-flake-update.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/python-check-requirements.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/python-lint.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/tidy-post.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/tidy-review.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/zig-build.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.gitmodules +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.pre-commit-config.yaml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/LICENSE +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/Makefile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/Package.swift +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/SHA256SUMS +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/awq-py/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/awq-py/awq/apply_awq.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/awq-py/requirements.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/build.zig +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ci/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ci/run.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/cmake/FindSIMD.cmake +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/codecov.yml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/base64.hpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/build-info.cpp.in +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/console.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/console.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/grammar-parser.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/grammar-parser.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/log.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/sampling.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/sampling.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/stb_image.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/train.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/convert-hf-to-gguf.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/convert-llama-ggml-to-gguf.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/convert-lora-to-ggml.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/convert-persimmon-to-gguf.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/convert.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/docs/BLIS.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/docs/llama-star/idea-arch.key +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/docs/llama-star/idea-arch.pdf +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/docs/token_generation_performance_tips.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/Miku.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/alpaca.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/base-translate.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched/batched.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched-bench/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched-bench/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched.swift/.gitignore +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched.swift/Makefile +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched.swift/Package.swift +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched.swift/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched.swift/Sources/main.swift +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/beam-search/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/beam-search/beam-search.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/benchmark/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/chat-13B.bat +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/chat-13B.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/chat-persistent.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/chat-vicuna.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/chat.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/convert-llama2c-to-ggml/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/embedding/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/embedding/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/embedding/embedding.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/export-lora/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/export-lora/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/export-lora/export-lora.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/finetune/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/finetune/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/finetune/convert-finetune-checkpoint-to-gguf.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/finetune/finetune.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/finetune/finetune.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/gguf/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/gguf/gguf.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/gpt4all.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/imatrix/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/imatrix/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/imatrix/imatrix.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/infill/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/infill/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/infill/infill.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/jeopardy/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/jeopardy/graph.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/jeopardy/jeopardy.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/jeopardy/qasheet.csv +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/jeopardy/questions.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/json-schema-to-grammar.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama-bench/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama-bench/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/.gitignore +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/.gitignore +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/proguard-rules.pro +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/AndroidManifest.xml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/cpp/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/cpp/llama-android.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/Downloadable.kt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/Llm.kt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/MainActivity.kt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/MainViewModel.kt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Color.kt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Theme.kt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Type.kt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/drawable/ic_launcher_background.xml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/drawable/ic_launcher_foreground.xml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-anydpi/ic_launcher.xml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-anydpi/ic_launcher_round.xml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-hdpi/ic_launcher.webp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-mdpi/ic_launcher.webp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xhdpi/ic_launcher.webp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/values/colors.xml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/values/strings.xml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/values/themes.xml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/xml/backup_rules.xml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/xml/data_extraction_rules.xml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/build.gradle.kts +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/gradle/wrapper/gradle-wrapper.jar +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/gradle/wrapper/gradle-wrapper.properties +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/gradle.properties +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/gradlew +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/settings.gradle.kts +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/.gitignore +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/AppIcon.appiconset/Contents.json +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/Contents.json +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Models/LlamaState.swift +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/DownloadButton.swift +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/InputButton.swift +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/LoadCustomButton.swift +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/llama_swiftuiApp.swift +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/contents.xcworkspacedata +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.vim +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama2-13b.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama2.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/android/adb_run.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/android/build_64.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/clip.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/clip.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/convert-image-encoder-to-gguf.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/llava-cli.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/llava-surgery.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/llava.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/llava.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llm.vim +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/lookahead/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/lookahead/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/lookahead/lookahead.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/lookup/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/lookup/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/lookup/lookup.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/main/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/main/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/main/main.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/main-cmake-pkg/.gitignore +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/main-cmake-pkg/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/make-ggml.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/parallel/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/parallel/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/parallel/parallel.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/passkey/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/passkey/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/passkey/passkey.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/perplexity/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/perplexity/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/perplexity/perplexity.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/pydantic-models-to-grammar-examples.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/pydantic_models_to_grammar.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/quantize/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/quantize/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/quantize/quantize.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/quantize-stats/quantize-stats.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/reason-act.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/api_like_OAI.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/chat-llama2.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/chat.mjs +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/completion.js.hpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/deps.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/httplib.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/index.html.hpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/index.js.hpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/json-schema-to-grammar.mjs.hpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/json.hpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/oai.hpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/public/completion.js +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/public/index.html +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/public/index.js +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/public/json-schema-to-grammar.mjs +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/utils.hpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server-llama2-13B.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/simple/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/simple/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/simple/simple.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/speculative/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/speculative/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/speculative/speculative.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/sycl/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/sycl/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/sycl/build.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/sycl/run-llama2.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/tokenize/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/tokenize/tokenize.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/train-text-from-scratch/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/flake.lock +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/flake.nix +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-alloc.c +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-alloc.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-backend-impl.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-backend.c +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-backend.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-cuda.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-impl.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-kompute.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-kompute.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-metal.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-mpi.c +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-mpi.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-opencl.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-opencl.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-quants.c +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-quants.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-vulkan.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/LICENSE +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/examples/writer.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/gguf/__init__.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/gguf/constants.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/gguf/gguf.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/gguf/gguf_reader.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/gguf/gguf_writer.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/gguf/py.typed +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/gguf/tensor_mapping.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/gguf/vocab.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/pyproject.toml +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/scripts/__init__.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/scripts/gguf-convert-endian.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/scripts/gguf-dump.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/scripts/gguf-set-metadata.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/tests/test_gguf.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/grammars/README.md +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/grammars/arithmetic.gbnf +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/grammars/c.gbnf +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/grammars/chess.gbnf +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/grammars/japanese.gbnf +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/grammars/json.gbnf +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/grammars/json_arr.gbnf +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/grammars/list.gbnf +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/common.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_add.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_addrow.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_cpy_f16_f16.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_cpy_f16_f32.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_cpy_f32_f16.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_cpy_f32_f32.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_diagmask.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_gelu.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_getrows.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_getrows_f16.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_getrows_q4_0.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_getrows_q4_1.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_getrows_q6_k.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_mul.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_mul_mat_f16.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_mul_mat_mat_f32.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_mul_mat_q4_0.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_mul_mat_q4_1.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_mul_mat_q6_k.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_mul_mat_q8_0.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_mul_mv_q_n.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_norm.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_relu.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_rmsnorm.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_rope_f16.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_rope_f32.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_scale.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_scale_8.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_silu.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/op_softmax.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/kompute-shaders/rope_common.comp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/media/llama-leader.jpeg +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/media/llama0-banner.png +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/media/llama0-logo.png +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/media/llama1-banner.png +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/media/llama1-logo.png +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/mypy.ini +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/pocs/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/pocs/vdot/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/pocs/vdot/q8dot.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/pocs/vdot/vdot.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/LLM-questions.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/alpaca.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/assistant.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/chat-with-baichuan.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/chat-with-bob.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/chat-with-qwen.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/chat.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/dan-modified.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/dan.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/mnemonics.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/parallel-questions.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/reason-act.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/requirements/requirements-convert-lora-to-ggml.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/requirements/requirements-convert-persimmon-to-gguf.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/requirements/requirements-convert.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/requirements.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/LlamaConfig.cmake.in +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/build-info.cmake +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/build-info.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/check-requirements.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/ci-run.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/compare-llama-bench.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/convert-gg.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/gen-build-info-cpp.cmake +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/get-flags.mk +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/get-hellaswag.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/get-pg.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/get-wikitext-2.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/get-winogrande.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/qnt-all.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/run-all-perf.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/run-all-ppl.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/run-with-preset.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/server-llm.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/sync-ggml-am.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/sync-ggml.last +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/sync-ggml.sh +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/verify-checksum-models.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/spm-headers/llama.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/.gitignore +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/get-model.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/get-model.h +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-autorelease.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-c.c +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-double-float.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-grad0.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-grammar-parser.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-llama-grammar.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-model-load-cancel.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-opt.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-quantize-fns.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-quantize-perf.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-rope.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-sampling.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-tokenizer-0-falcon.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-tokenizer-0-falcon.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-tokenizer-0-llama.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-tokenizer-0-llama.py +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-tokenizer-1-llama.cpp +0 -0
- {llama_cpp_python-0.2.37 → llama_cpp_python-0.2.38}/vendor/llama.cpp/unicode.h +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3322eadbf30a68731f6aafe0b4d055255b46d8f7 '3322eadbf30a68731f6aafe0b4d055255b46d8f7' of https://github.com/abetlen/llama-cpp-python
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3322eadbf30a68731f6aafe0b4d055255b46d8f7
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
[gc]
|
|
10
10
|
auto = 0
|
|
11
11
|
[http "https://github.com/"]
|
|
12
|
-
extraheader = AUTHORIZATION: basic
|
|
12
|
+
extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzX2ptaUFjRVlmS1RIZmJGRE9hdnltMDZJQ0p2MGVoTjFxOGFWNQ==
|
|
13
13
|
[submodule "vendor/llama.cpp"]
|
|
14
14
|
active = true
|
|
15
15
|
url = https://github.com/ggerganov/llama.cpp.git
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 3322eadbf30a68731f6aafe0b4d055255b46d8f7 runner <runner@fv-az1016-588.pv3vitign2bulj5h5vrau5ekvd.cx.internal.cloudapp.net> 1706732571 +0000 checkout: moving from master to refs/tags/v0.2.38
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
[gc]
|
|
14
14
|
auto = 0
|
|
15
15
|
[http "https://github.com/"]
|
|
16
|
-
extraheader = AUTHORIZATION: basic
|
|
16
|
+
extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzX2ptaUFjRVlmS1RIZmJGRE9hdnltMDZJQ0p2MGVoTjFxOGFWNQ==
|
|
17
17
|
[url "https://github.com/"]
|
|
18
18
|
insteadOf = git@github.com:
|
|
19
19
|
insteadOf = org-6826477@github.com:
|
|
Binary file
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915 runner <runner@fv-az1016-588.pv3vitign2bulj5h5vrau5ekvd.cx.internal.cloudapp.net> 1706732572 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
|
|
2
|
+
1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915 1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915 runner <runner@fv-az1016-588.pv3vitign2bulj5h5vrau5ekvd.cx.internal.cloudapp.net> 1706732572 +0000 checkout: moving from master to 1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915 runner <runner@fv-az1016-588.pv3vitign2bulj5h5vrau5ekvd.cx.internal.cloudapp.net> 1706732572 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915 runner <runner@fv-az1016-588.pv3vitign2bulj5h5vrau5ekvd.cx.internal.cloudapp.net> 1706732572 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3322eadbf30a68731f6aafe0b4d055255b46d8f7
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3322eadbf30a68731f6aafe0b4d055255b46d8f7
|
|
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.2.38]
|
|
11
|
+
|
|
12
|
+
- feat: Update llama.cpp to ggerganov/llama.cpp@1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915
|
|
13
|
+
- feat: Add speculative decoding by @abetlen in #1120
|
|
14
|
+
- fix: Pass raise_exception and add_generation_prompt to jinja2 chat template 078cca0361bf5a94d2cf52ed04980d20e32d6f95
|
|
15
|
+
|
|
10
16
|
## [0.2.37]
|
|
11
17
|
|
|
12
18
|
- feat: Update llama.cpp to ggerganov/llama.cpp@fea4fd4ba7f6b754ac795387b275e1a014a77bde
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: llama_cpp_python
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.38
|
|
4
4
|
Summary: Python bindings for the llama.cpp library
|
|
5
5
|
Author-Email: Andrei Betlen <abetlen@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -421,6 +421,24 @@ Then you'll need to use a custom chat handler to load the clip model and process
|
|
|
421
421
|
)
|
|
422
422
|
```
|
|
423
423
|
|
|
424
|
+
### Speculative Decoding
|
|
425
|
+
|
|
426
|
+
`llama-cpp-python` supports speculative decoding which allows the model to generate completions based on a draft model.
|
|
427
|
+
|
|
428
|
+
The fastest way to use speculative decoding is through the `LlamaPromptLookupDecoding` class.
|
|
429
|
+
|
|
430
|
+
Just pass this as a draft model to the `Llama` class during initialization.
|
|
431
|
+
|
|
432
|
+
```python
|
|
433
|
+
from llama_cpp import Llama
|
|
434
|
+
from llama_cpp.llama_speculative import LlamaPromptLookupDecoding
|
|
435
|
+
|
|
436
|
+
llama = Llama(
|
|
437
|
+
model_path="path/to/model.gguf",
|
|
438
|
+
draft_model=LlamaPromptLookupDecoding(num_pred_tokens=10) # num_pred_tokens is the number of tokens to predict 10 is the default and generally good for gpu, 2 performs better for cpu-only machines.
|
|
439
|
+
)
|
|
440
|
+
```
|
|
441
|
+
|
|
424
442
|
### Adjusting the Context Window
|
|
425
443
|
|
|
426
444
|
The context window of the Llama models determines the maximum number of tokens that can be processed at once. By default, this is set to 512 tokens, but can be adjusted based on your requirements.
|
|
@@ -378,6 +378,24 @@ Then you'll need to use a custom chat handler to load the clip model and process
|
|
|
378
378
|
)
|
|
379
379
|
```
|
|
380
380
|
|
|
381
|
+
### Speculative Decoding
|
|
382
|
+
|
|
383
|
+
`llama-cpp-python` supports speculative decoding which allows the model to generate completions based on a draft model.
|
|
384
|
+
|
|
385
|
+
The fastest way to use speculative decoding is through the `LlamaPromptLookupDecoding` class.
|
|
386
|
+
|
|
387
|
+
Just pass this as a draft model to the `Llama` class during initialization.
|
|
388
|
+
|
|
389
|
+
```python
|
|
390
|
+
from llama_cpp import Llama
|
|
391
|
+
from llama_cpp.llama_speculative import LlamaPromptLookupDecoding
|
|
392
|
+
|
|
393
|
+
llama = Llama(
|
|
394
|
+
model_path="path/to/model.gguf",
|
|
395
|
+
draft_model=LlamaPromptLookupDecoding(num_pred_tokens=10) # num_pred_tokens is the number of tokens to predict 10 is the default and generally good for gpu, 2 performs better for cpu-only machines.
|
|
396
|
+
)
|
|
397
|
+
```
|
|
398
|
+
|
|
381
399
|
### Adjusting the Context Window
|
|
382
400
|
|
|
383
401
|
The context window of the Llama models determines the maximum number of tokens that can be processed at once. By default, this is set to 512 tokens, but can be adjusted based on your requirements.
|
|
@@ -30,6 +30,8 @@ from .llama_cache import (
|
|
|
30
30
|
import llama_cpp.llama_cpp as llama_cpp
|
|
31
31
|
import llama_cpp.llama_chat_format as llama_chat_format
|
|
32
32
|
|
|
33
|
+
from llama_cpp.llama_speculative import LlamaDraftModel
|
|
34
|
+
|
|
33
35
|
import numpy as np
|
|
34
36
|
import numpy.typing as npt
|
|
35
37
|
|
|
@@ -39,6 +41,8 @@ from ._internals import (
|
|
|
39
41
|
_LlamaContext, # type: ignore
|
|
40
42
|
_LlamaBatch, # type: ignore
|
|
41
43
|
_LlamaTokenDataArray, # type: ignore
|
|
44
|
+
_LlamaSamplingParams, # type: ignore
|
|
45
|
+
_LlamaSamplingContext, # type: ignore
|
|
42
46
|
)
|
|
43
47
|
|
|
44
48
|
|
|
@@ -89,6 +93,8 @@ class Llama:
|
|
|
89
93
|
# Chat Format Params
|
|
90
94
|
chat_format: Optional[str] = None,
|
|
91
95
|
chat_handler: Optional[llama_chat_format.LlamaChatCompletionHandler] = None,
|
|
96
|
+
# Speculative Decoding
|
|
97
|
+
draft_model: Optional[LlamaDraftModel] = None,
|
|
92
98
|
# Misc
|
|
93
99
|
verbose: bool = True,
|
|
94
100
|
# Extra Params
|
|
@@ -152,6 +158,7 @@ class Llama:
|
|
|
152
158
|
numa: Enable NUMA support. (NOTE: The initial value of this parameter is used for the remainder of the program as this value is set in llama_backend_init)
|
|
153
159
|
chat_format: String specifying the chat format to use when calling create_chat_completion.
|
|
154
160
|
chat_handler: Optional chat handler to use when calling create_chat_completion.
|
|
161
|
+
draft_model: Optional draft model to use for speculative decoding.
|
|
155
162
|
verbose: Print verbose output to stderr.
|
|
156
163
|
|
|
157
164
|
Raises:
|
|
@@ -315,6 +322,8 @@ class Llama:
|
|
|
315
322
|
self.chat_format = chat_format
|
|
316
323
|
self.chat_handler = chat_handler
|
|
317
324
|
|
|
325
|
+
self.draft_model = draft_model
|
|
326
|
+
|
|
318
327
|
self._n_vocab = self.n_vocab()
|
|
319
328
|
self._n_ctx = self.n_ctx()
|
|
320
329
|
|
|
@@ -503,6 +512,7 @@ class Llama:
|
|
|
503
512
|
penalize_nl: bool = True,
|
|
504
513
|
logits_processor: Optional[LogitsProcessorList] = None,
|
|
505
514
|
grammar: Optional[LlamaGrammar] = None,
|
|
515
|
+
idx: Optional[int] = None,
|
|
506
516
|
):
|
|
507
517
|
"""Sample a token from the model.
|
|
508
518
|
|
|
@@ -517,77 +527,46 @@ class Llama:
|
|
|
517
527
|
"""
|
|
518
528
|
assert self._ctx is not None
|
|
519
529
|
assert self.n_tokens > 0
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
n_ctx = self._n_ctx
|
|
526
|
-
top_k = n_vocab if top_k <= 0 else top_k
|
|
527
|
-
last_n_tokens_size = n_ctx if last_n_tokens_size < 0 else last_n_tokens_size
|
|
528
|
-
last_n_tokens_data_c = (llama_cpp.llama_token * last_n_tokens_size)(
|
|
529
|
-
*last_n_tokens_data
|
|
530
|
-
)
|
|
531
|
-
logits: npt.NDArray[np.single] = self._scores[-1, :]
|
|
530
|
+
|
|
531
|
+
if idx is None:
|
|
532
|
+
logits: npt.NDArray[np.single] = self._scores[-1, :]
|
|
533
|
+
else:
|
|
534
|
+
logits = self._scores[idx, :]
|
|
532
535
|
|
|
533
536
|
if logits_processor is not None:
|
|
534
|
-
logits[:] =
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
537
|
+
logits[:] = (
|
|
538
|
+
logits_processor(self._input_ids, logits)
|
|
539
|
+
if idx is None
|
|
540
|
+
else logits_processor(self._input_ids[:idx], logits)
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
sampling_params = _LlamaSamplingParams(
|
|
544
|
+
top_k=top_k,
|
|
545
|
+
top_p=top_p,
|
|
546
|
+
min_p=min_p,
|
|
547
|
+
tfs_z=tfs_z,
|
|
548
|
+
typical_p=typical_p,
|
|
549
|
+
temp=temp,
|
|
550
|
+
penalty_last_n=self.last_n_tokens_size,
|
|
542
551
|
penalty_repeat=repeat_penalty,
|
|
543
552
|
penalty_freq=frequency_penalty,
|
|
544
553
|
penalty_present=presence_penalty,
|
|
554
|
+
mirostat=mirostat_mode,
|
|
555
|
+
mirostat_tau=mirostat_tau,
|
|
556
|
+
mirostat_eta=mirostat_eta,
|
|
557
|
+
penalize_nl=penalize_nl,
|
|
558
|
+
)
|
|
559
|
+
sampling_context = _LlamaSamplingContext(
|
|
560
|
+
params=sampling_params,
|
|
561
|
+
grammar=grammar,
|
|
562
|
+
)
|
|
563
|
+
sampling_context.prev = list(self.eval_tokens)
|
|
564
|
+
id = sampling_context.sample(ctx_main=self._ctx, logits_array=logits)
|
|
565
|
+
sampling_context.accept(
|
|
566
|
+
ctx_main=self._ctx,
|
|
567
|
+
id=id,
|
|
568
|
+
apply_grammar=grammar is not None,
|
|
545
569
|
)
|
|
546
|
-
if not penalize_nl:
|
|
547
|
-
self._candidates.candidates.data[self._token_nl].logit = llama_cpp.c_float(
|
|
548
|
-
nl_logit
|
|
549
|
-
)
|
|
550
|
-
|
|
551
|
-
if grammar is not None:
|
|
552
|
-
self._ctx.sample_grammar(
|
|
553
|
-
candidates=self._candidates,
|
|
554
|
-
grammar=grammar,
|
|
555
|
-
)
|
|
556
|
-
|
|
557
|
-
if temp < 0.0:
|
|
558
|
-
self._ctx.sample_softmax(candidates=self._candidates)
|
|
559
|
-
id = self._candidates.candidates.data[0].id
|
|
560
|
-
elif temp == 0.0:
|
|
561
|
-
id = self._ctx.sample_token_greedy(candidates=self._candidates)
|
|
562
|
-
elif mirostat_mode == 1:
|
|
563
|
-
self._ctx.sample_temp(candidates=self._candidates, temp=temp)
|
|
564
|
-
id = self._ctx.sample_token_mirostat(
|
|
565
|
-
candidates=self._candidates,
|
|
566
|
-
tau=mirostat_tau,
|
|
567
|
-
eta=mirostat_eta,
|
|
568
|
-
mu=ctypes.pointer(self._mirostat_mu),
|
|
569
|
-
m=100,
|
|
570
|
-
)
|
|
571
|
-
elif mirostat_mode == 2:
|
|
572
|
-
self._ctx.sample_temp(candidates=self._candidates, temp=temp)
|
|
573
|
-
id = self._ctx.sample_token_mirostat_v2(
|
|
574
|
-
candidates=self._candidates,
|
|
575
|
-
tau=mirostat_tau,
|
|
576
|
-
eta=mirostat_eta,
|
|
577
|
-
mu=ctypes.pointer(self._mirostat_mu),
|
|
578
|
-
)
|
|
579
|
-
else:
|
|
580
|
-
self._ctx.sample_top_k(candidates=self._candidates, k=top_k, min_keep=1)
|
|
581
|
-
self._ctx.sample_tail_free(candidates=self._candidates, z=tfs_z, min_keep=1)
|
|
582
|
-
self._ctx.sample_typical(
|
|
583
|
-
candidates=self._candidates, p=typical_p, min_keep=1
|
|
584
|
-
)
|
|
585
|
-
self._ctx.sample_top_p(candidates=self._candidates, p=top_p, min_keep=1)
|
|
586
|
-
self._ctx.sample_min_p(candidates=self._candidates, p=min_p, min_keep=1)
|
|
587
|
-
self._ctx.sample_temp(candidates=self._candidates, temp=temp)
|
|
588
|
-
id = self._ctx.sample_token(candidates=self._candidates)
|
|
589
|
-
if grammar is not None:
|
|
590
|
-
self._ctx.grammar_accept_token(grammar=grammar, token=id)
|
|
591
570
|
return id
|
|
592
571
|
|
|
593
572
|
def generate(
|
|
@@ -656,34 +635,56 @@ class Llama:
|
|
|
656
635
|
if grammar is not None:
|
|
657
636
|
grammar.reset()
|
|
658
637
|
|
|
638
|
+
sample_idx = self.n_tokens + len(tokens) - 1
|
|
639
|
+
tokens = list(tokens)
|
|
640
|
+
|
|
659
641
|
# Eval and sample
|
|
660
642
|
while True:
|
|
661
643
|
self.eval(tokens)
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
644
|
+
while sample_idx < self.n_tokens:
|
|
645
|
+
token = self.sample(
|
|
646
|
+
top_k=top_k,
|
|
647
|
+
top_p=top_p,
|
|
648
|
+
min_p=min_p,
|
|
649
|
+
typical_p=typical_p,
|
|
650
|
+
temp=temp,
|
|
651
|
+
repeat_penalty=repeat_penalty,
|
|
652
|
+
frequency_penalty=frequency_penalty,
|
|
653
|
+
presence_penalty=presence_penalty,
|
|
654
|
+
tfs_z=tfs_z,
|
|
655
|
+
mirostat_mode=mirostat_mode,
|
|
656
|
+
mirostat_tau=mirostat_tau,
|
|
657
|
+
mirostat_eta=mirostat_eta,
|
|
658
|
+
logits_processor=logits_processor,
|
|
659
|
+
grammar=grammar,
|
|
660
|
+
penalize_nl=penalize_nl,
|
|
661
|
+
idx=sample_idx,
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
sample_idx += 1
|
|
665
|
+
if stopping_criteria is not None and stopping_criteria(
|
|
666
|
+
self._input_ids, self._scores[-1, :]
|
|
667
|
+
):
|
|
668
|
+
return
|
|
669
|
+
tokens_or_none = yield token
|
|
670
|
+
tokens.clear()
|
|
671
|
+
tokens.append(token)
|
|
672
|
+
if tokens_or_none is not None:
|
|
673
|
+
tokens.extend(tokens_or_none)
|
|
674
|
+
|
|
675
|
+
if sample_idx < self.n_tokens and token != self._input_ids[sample_idx]:
|
|
676
|
+
self.n_tokens = sample_idx
|
|
677
|
+
self._ctx.kv_cache_seq_rm(-1, self.n_tokens, -1)
|
|
678
|
+
break
|
|
679
|
+
|
|
680
|
+
if self.draft_model is not None:
|
|
681
|
+
self.input_ids[self.n_tokens : self.n_tokens + len(tokens)] = tokens
|
|
682
|
+
draft_tokens = self.draft_model(self.input_ids[:self.n_tokens + len(tokens)])
|
|
683
|
+
tokens.extend(
|
|
684
|
+
draft_tokens.astype(int)[
|
|
685
|
+
: self._n_ctx - self.n_tokens - len(tokens)
|
|
686
|
+
]
|
|
687
|
+
)
|
|
687
688
|
|
|
688
689
|
def create_embedding(
|
|
689
690
|
self, input: Union[str, List[str]], model: Optional[str] = None
|
|
@@ -185,16 +185,17 @@ class Jinja2ChatFormatter(ChatFormatter):
|
|
|
185
185
|
messages: List[llama_types.ChatCompletionRequestMessage],
|
|
186
186
|
**kwargs: Any,
|
|
187
187
|
) -> ChatFormatterResponse:
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
llama_types.ChatCompletionRequestAssistantMessage(
|
|
192
|
-
role="assistant", content=""
|
|
193
|
-
),
|
|
194
|
-
]
|
|
188
|
+
def raise_exception(message: str):
|
|
189
|
+
raise ValueError(message)
|
|
190
|
+
|
|
195
191
|
prompt = self._environment.render(
|
|
196
|
-
messages=messages,
|
|
192
|
+
messages=messages,
|
|
193
|
+
eos_token=self.eos_token,
|
|
194
|
+
bos_token=self.bos_token,
|
|
195
|
+
raise_exception=raise_exception,
|
|
196
|
+
add_generation_prompt=self.add_generation_prompt
|
|
197
197
|
)
|
|
198
|
+
|
|
198
199
|
return ChatFormatterResponse(prompt=prompt, stop=[self.eos_token])
|
|
199
200
|
|
|
200
201
|
def to_chat_handler(self) -> LlamaChatCompletionHandler:
|
|
@@ -98,7 +98,7 @@ ggml_backend_sched_eval_callback = ctypes.CFUNCTYPE(c_bool, c_void_p, c_bool, c_
|
|
|
98
98
|
# llama.h bindings
|
|
99
99
|
|
|
100
100
|
_lib.llama_max_devices.argtypes = []
|
|
101
|
-
_lib.llama_max_devices.restype = ctypes.
|
|
101
|
+
_lib.llama_max_devices.restype = ctypes.c_size_t
|
|
102
102
|
|
|
103
103
|
LLAMA_MAX_DEVICES = _lib.llama_max_devices()
|
|
104
104
|
|
|
@@ -390,7 +390,7 @@ class llama_model_kv_override(Structure):
|
|
|
390
390
|
# // LLAMA_SPLIT_LAYER: ignored
|
|
391
391
|
# int32_t main_gpu;
|
|
392
392
|
|
|
393
|
-
# // proportion of the model (layers or rows) to offload to each GPU, size:
|
|
393
|
+
# // proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
|
|
394
394
|
# const float * tensor_split;
|
|
395
395
|
|
|
396
396
|
# // Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
|
|
@@ -417,7 +417,7 @@ class llama_model_params(Structure):
|
|
|
417
417
|
n_gpu_layers (int): number of layers to store in VRAM
|
|
418
418
|
split_mode (int): how to split the model across multiple GPUs
|
|
419
419
|
main_gpu (int): the GPU that is used for the entire model. main_gpu interpretation depends on split_mode: LLAMA_SPLIT_NONE: the GPU that is used for the entire model LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results LLAMA_SPLIT_LAYER: ignored
|
|
420
|
-
tensor_split (ctypes.Array[ctypes.c_float]): proportion of the model (layers or rows) to offload to each GPU, size:
|
|
420
|
+
tensor_split (ctypes.Array[ctypes.c_float]): proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
|
|
421
421
|
progress_callback (llama_progress_callback): called with a progress value between 0.0 and 1.0. Pass NULL to disable. If the provided progress_callback returns true, model loading continues. If it returns false, model loading is immediately aborted.
|
|
422
422
|
progress_callback_user_data (ctypes.c_void_p): context pointer passed to the progress callback
|
|
423
423
|
kv_overrides (ctypes.Array[llama_model_kv_override]): override key-value pairs of the model meta data
|
|
@@ -760,16 +760,43 @@ _lib.llama_time_us.argtypes = []
|
|
|
760
760
|
_lib.llama_time_us.restype = ctypes.c_int64
|
|
761
761
|
|
|
762
762
|
|
|
763
|
-
# LLAMA_API
|
|
763
|
+
# LLAMA_API size_t llama_max_devices(void);
|
|
764
764
|
def llama_max_devices() -> int:
|
|
765
765
|
return _lib.llama_max_devices()
|
|
766
766
|
|
|
767
767
|
|
|
768
768
|
_lib.llama_max_devices.argtypes = []
|
|
769
|
-
_lib.llama_max_devices.restype = ctypes.
|
|
769
|
+
_lib.llama_max_devices.restype = ctypes.c_size_t
|
|
770
770
|
|
|
771
771
|
|
|
772
|
-
# LLAMA_API bool
|
|
772
|
+
# LLAMA_API bool llama_supports_mmap (void);
|
|
773
|
+
def llama_supports_mmap() -> bool:
|
|
774
|
+
return _lib.llama_supports_mmap()
|
|
775
|
+
|
|
776
|
+
|
|
777
|
+
_lib.llama_supports_mmap.argtypes = []
|
|
778
|
+
_lib.llama_supports_mmap.restype = c_bool
|
|
779
|
+
|
|
780
|
+
|
|
781
|
+
# LLAMA_API bool llama_supports_mlock (void);
|
|
782
|
+
def llama_supports_mlock() -> bool:
|
|
783
|
+
return _lib.llama_supports_mlock()
|
|
784
|
+
|
|
785
|
+
|
|
786
|
+
_lib.llama_supports_mlock.argtypes = []
|
|
787
|
+
_lib.llama_supports_mlock.restype = c_bool
|
|
788
|
+
|
|
789
|
+
|
|
790
|
+
# LLAMA_API bool llama_supports_gpu_offload(void);
|
|
791
|
+
def llama_supports_gpu_offload() -> bool:
|
|
792
|
+
return _lib.llama_supports_gpu_offload()
|
|
793
|
+
|
|
794
|
+
|
|
795
|
+
_lib.llama_supports_gpu_offload.argtypes = []
|
|
796
|
+
_lib.llama_supports_gpu_offload.restype = c_bool
|
|
797
|
+
|
|
798
|
+
|
|
799
|
+
# LLAMA_API DEPRECATED(bool llama_mmap_supported (void), "use llama_supports_mmap() instead");
|
|
773
800
|
def llama_mmap_supported() -> bool:
|
|
774
801
|
return _lib.llama_mmap_supported()
|
|
775
802
|
|
|
@@ -778,7 +805,7 @@ _lib.llama_mmap_supported.argtypes = []
|
|
|
778
805
|
_lib.llama_mmap_supported.restype = c_bool
|
|
779
806
|
|
|
780
807
|
|
|
781
|
-
# LLAMA_API bool llama_mlock_supported(void);
|
|
808
|
+
# LLAMA_API DEPRECATED(bool llama_mlock_supported(void), "use llama_supports_mlock() instead");
|
|
782
809
|
def llama_mlock_supported() -> bool:
|
|
783
810
|
return _lib.llama_mlock_supported()
|
|
784
811
|
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import numpy.typing as npt
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LlamaDraftModel(abc.ABC):
|
|
10
|
+
@abc.abstractmethod
|
|
11
|
+
def __call__(
|
|
12
|
+
self, input_ids: npt.NDArray[np.intc], /, **kwargs: Any
|
|
13
|
+
) -> npt.NDArray[np.intc]:
|
|
14
|
+
raise NotImplementedError()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class LlamaPromptLookupDecoding(LlamaDraftModel):
|
|
18
|
+
"""Based on https://github.com/apoorvumang/prompt-lookup-decoding"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, max_ngram_size: int = 2, num_pred_tokens: int = 10):
|
|
21
|
+
self.max_ngram_size = max_ngram_size
|
|
22
|
+
self.num_pred_tokens = num_pred_tokens
|
|
23
|
+
|
|
24
|
+
@staticmethod
|
|
25
|
+
def find_candidate_pred_tokens(
|
|
26
|
+
input_ids: npt.NDArray[np.intc],
|
|
27
|
+
max_ngram_size: int,
|
|
28
|
+
num_pred_tokens: int,
|
|
29
|
+
):
|
|
30
|
+
input_length = input_ids.shape[0]
|
|
31
|
+
|
|
32
|
+
for ngram_size in range(min(max_ngram_size, input_length - 1), 0, -1):
|
|
33
|
+
# Create sliding windows of size ngram_size
|
|
34
|
+
windows = np.lib.stride_tricks.sliding_window_view(input_ids, (ngram_size,))
|
|
35
|
+
|
|
36
|
+
# Convert ngram to an array for comparison
|
|
37
|
+
ngram_array = input_ids[-ngram_size:]
|
|
38
|
+
|
|
39
|
+
# Find where the windows match the ngram
|
|
40
|
+
matches = np.all(windows == ngram_array, axis=1)
|
|
41
|
+
|
|
42
|
+
# Get the indices of matches
|
|
43
|
+
match_indices = np.nonzero(matches)[0]
|
|
44
|
+
|
|
45
|
+
# Iterate through match indices to find a valid continuation
|
|
46
|
+
for idx in match_indices:
|
|
47
|
+
start_idx = idx + ngram_size
|
|
48
|
+
end_idx = start_idx + num_pred_tokens
|
|
49
|
+
end_idx = min(end_idx, input_length)
|
|
50
|
+
|
|
51
|
+
if start_idx < end_idx:
|
|
52
|
+
return input_ids[start_idx:end_idx]
|
|
53
|
+
|
|
54
|
+
# If no match is found, return an empty array
|
|
55
|
+
return np.array([], dtype=np.intc)
|
|
56
|
+
|
|
57
|
+
def __call__(
|
|
58
|
+
self, input_ids: npt.NDArray[np.intc], /, **kwargs: Any
|
|
59
|
+
) -> npt.NDArray[np.intc]:
|
|
60
|
+
return self.find_candidate_pred_tokens(
|
|
61
|
+
input_ids=input_ids,
|
|
62
|
+
max_ngram_size=self.max_ngram_size,
|
|
63
|
+
num_pred_tokens=self.num_pred_tokens,
|
|
64
|
+
)
|
|
@@ -5,6 +5,7 @@ import json
|
|
|
5
5
|
from typing import Dict, Optional, Union, List
|
|
6
6
|
|
|
7
7
|
import llama_cpp
|
|
8
|
+
import llama_cpp.llama_speculative as llama_speculative
|
|
8
9
|
|
|
9
10
|
from llama_cpp.server.settings import ModelSettings
|
|
10
11
|
|
|
@@ -92,6 +93,12 @@ class LlamaProxy:
|
|
|
92
93
|
)
|
|
93
94
|
)
|
|
94
95
|
|
|
96
|
+
draft_model = None
|
|
97
|
+
if settings.draft_model is not None:
|
|
98
|
+
draft_model = llama_speculative.LlamaPromptLookupDecoding(
|
|
99
|
+
num_pred_tokens=settings.draft_model_num_pred_tokens
|
|
100
|
+
)
|
|
101
|
+
|
|
95
102
|
kv_overrides: Optional[Dict[str, Union[bool, int, float]]] = None
|
|
96
103
|
if settings.kv_overrides is not None:
|
|
97
104
|
assert isinstance(settings.kv_overrides, list)
|
|
@@ -147,6 +154,8 @@ class LlamaProxy:
|
|
|
147
154
|
# Chat Format Params
|
|
148
155
|
chat_format=settings.chat_format,
|
|
149
156
|
chat_handler=chat_handler,
|
|
157
|
+
# Speculative Decoding
|
|
158
|
+
draft_model=draft_model,
|
|
150
159
|
# Misc
|
|
151
160
|
verbose=settings.verbose,
|
|
152
161
|
)
|
|
@@ -143,6 +143,15 @@ class ModelSettings(BaseSettings):
|
|
|
143
143
|
default=None,
|
|
144
144
|
description="The model name or path to a pretrained HuggingFace tokenizer model. Same as you would pass to AutoTokenizer.from_pretrained().",
|
|
145
145
|
)
|
|
146
|
+
# Speculative Decoding
|
|
147
|
+
draft_model: Optional[str] = Field(
|
|
148
|
+
default=None,
|
|
149
|
+
description="Method to use for speculative decoding. One of (prompt-lookup-decoding).",
|
|
150
|
+
)
|
|
151
|
+
draft_model_num_pred_tokens: int = Field(
|
|
152
|
+
default=10,
|
|
153
|
+
description="Number of tokens to predict using the draft model.",
|
|
154
|
+
)
|
|
146
155
|
# Misc
|
|
147
156
|
verbose: bool = Field(
|
|
148
157
|
default=True, description="Whether to print debug information."
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from llama_cpp.llama_speculative import LlamaPromptLookupDecoding
|
|
4
|
+
|
|
5
|
+
def test_find_candidate_pred_tokens():
|
|
6
|
+
find_candidate_pred_tokens = LlamaPromptLookupDecoding.find_candidate_pred_tokens
|
|
7
|
+
|
|
8
|
+
# Test Case 1: Matching ngram is found
|
|
9
|
+
input_ids1 = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3])
|
|
10
|
+
result1 = find_candidate_pred_tokens(input_ids1, max_ngram_size=3, num_pred_tokens=2)
|
|
11
|
+
assert np.array_equal(result1, np.array([1, 2]))
|
|
12
|
+
|
|
13
|
+
# Test Case 2: Matching ngram is not found
|
|
14
|
+
input_ids2 = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
|
|
15
|
+
result2 = find_candidate_pred_tokens(input_ids2, max_ngram_size=3, num_pred_tokens=2)
|
|
16
|
+
assert np.array_equal(result2, np.array([]))
|