llama-cpp-python 0.2.25__tar.gz → 0.2.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_cpp_python-0.2.26/.git/FETCH_HEAD +1 -0
- llama_cpp_python-0.2.26/.git/HEAD +1 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/config +1 -1
- llama_cpp_python-0.2.26/.git/index +0 -0
- llama_cpp_python-0.2.26/.git/logs/HEAD +1 -0
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/HEAD +1 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/config +1 -1
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/index +0 -0
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/logs/HEAD +2 -0
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/logs/refs/heads/master +1 -0
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/logs/refs/remotes/origin/HEAD +1 -0
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/objects/pack/pack-0111b9b091eb3bb79082a11785b10794d01ad8e7.idx +0 -0
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/objects/pack/pack-53054ebe4aff53f3127333b245dfe71db566f164.pack → llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/objects/pack/pack-0111b9b091eb3bb79082a11785b10794d01ad8e7.pack +0 -0
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/objects/pack/pack-0111b9b091eb3bb79082a11785b10794d01ad8e7.rev +0 -0
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/packed-refs +2 -0
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/refs/heads/master +1 -0
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/refs/tags/b1708 +1 -0
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/shallow +1 -0
- llama_cpp_python-0.2.26/.git/objects/01/1c3630f5a130505458c29d58f1654d5efba3bf +0 -0
- llama_cpp_python-0.2.26/.git/objects/0d/870969f4b23bd92a09ec29134d3fb454d38bec +0 -0
- llama_cpp_python-0.2.26/.git/objects/22/8e3b99b75ebb836a7e001d2c1316eb7799f3de +0 -0
- llama_cpp_python-0.2.26/.git/objects/2e/ead7f8523a78a1f4d8c776de42d230265bcf37 +0 -0
- llama_cpp_python-0.2.26/.git/objects/46/6e2cf6a1f3a75ef800a56f5fd5e85090a98786 +0 -0
- llama_cpp_python-0.2.26/.git/objects/5d/b46643482a53bdce74c1e51b4ff3944dcf866b +0 -0
- llama_cpp_python-0.2.26/.git/objects/75/800c0d80955f2bc82409ca16eda2b0dc405757 +0 -0
- llama_cpp_python-0.2.26/.git/objects/7f/2e3d8c1e2a505f69b12024b9cc80712f447cdd +0 -0
- llama_cpp_python-0.2.26/.git/objects/91/78a22255fef8acfe298d0f27640f65f647d400 +0 -0
- llama_cpp_python-0.2.26/.git/objects/d3/efb3a6fe1e001db62ec08e5d31ce1d08567045 +0 -0
- llama_cpp_python-0.2.25/.git/objects/5d/f12aaf53a0e85f55e1aa0e5167bc831ab32783 → llama_cpp_python-0.2.26/.git/objects/eb/0fb9662e690d0f9de4632cddd321b3f872a725 +0 -0
- llama_cpp_python-0.2.26/.git/refs/tags/v0.2.26 +1 -0
- llama_cpp_python-0.2.26/.git/shallow +1 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.github/ISSUE_TEMPLATE/bug_report.md +1 -1
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/CHANGELOG.md +4 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/PKG-INFO +1 -1
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/llama_cpp/__init__.py +1 -1
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/llama_cpp/llama.py +1 -1
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/llama_cpp/llama_cpp.py +4 -3
- llama_cpp_python-0.2.26/vendor/llama.cpp/.github/ISSUE_TEMPLATE/bug.md +9 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.github/workflows/docker.yml +1 -1
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/CMakeLists.txt +2 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/Makefile +2 -4
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/README.md +2 -0
- llama_cpp_python-0.2.26/vendor/llama.cpp/awq-py/README.md +116 -0
- llama_cpp_python-0.2.26/vendor/llama.cpp/awq-py/awq/apply_awq.py +254 -0
- llama_cpp_python-0.2.26/vendor/llama.cpp/awq-py/requirements.txt +2 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/build-info.cpp +1 -1
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/sampling.cpp +50 -6
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/sampling.h +3 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/convert-hf-to-gguf.py +109 -4
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/convert.py +20 -4
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/finetune/finetune.cpp +7 -7
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/README.md +2 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/server.cpp +44 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-backend.c +6 -10
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-cuda.cu +499 -369
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-quants.c +23 -342
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml.c +3 -6
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml.h +2 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/gguf/constants.py +20 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/gguf/tensor_mapping.py +28 -14
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/llama.cpp +222 -11
- llama_cpp_python-0.2.26/vendor/llama.cpp/scripts/sync-ggml-am.sh +131 -0
- llama_cpp_python-0.2.26/vendor/llama.cpp/scripts/sync-ggml.last +1 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-grad0.cpp +0 -3
- llama_cpp_python-0.2.25/.git/FETCH_HEAD +0 -1
- llama_cpp_python-0.2.25/.git/HEAD +0 -1
- llama_cpp_python-0.2.25/.git/index +0 -0
- llama_cpp_python-0.2.25/.git/logs/HEAD +0 -1
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/HEAD +0 -1
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/index +0 -0
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/logs/HEAD +0 -2
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/logs/refs/heads/master +0 -1
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/logs/refs/remotes/origin/HEAD +0 -1
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/objects/pack/pack-53054ebe4aff53f3127333b245dfe71db566f164.idx +0 -0
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/objects/pack/pack-53054ebe4aff53f3127333b245dfe71db566f164.rev +0 -0
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/packed-refs +0 -2
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/refs/heads/master +0 -1
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/refs/tags/b1691 +0 -1
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/shallow +0 -1
- llama_cpp_python-0.2.25/.git/objects/13/454a3a6bea90892a42064c32f7a1a60deb0806 +0 -0
- llama_cpp_python-0.2.25/.git/objects/1c/5efea21fad700ef81acb5682eb71efa64c7453 +0 -0
- llama_cpp_python-0.2.25/.git/objects/2b/14bc6783798c56c71db248c5a834c30fbbce21 +0 -0
- llama_cpp_python-0.2.25/.git/objects/37/556bf9c4f83f51e76682316ff4ea3aed58a279 +0 -0
- llama_cpp_python-0.2.25/.git/objects/64/b567b4f3142efeae284deeab2342122d7e62bd +0 -0
- llama_cpp_python-0.2.25/.git/objects/78/8732bd3ba7ed71b0e49fb2dfe42d4ed781c0eb +0 -0
- llama_cpp_python-0.2.25/.git/objects/7c/1dececc933fdfba00ee95b5bed81f447a21333 +0 -0
- llama_cpp_python-0.2.25/.git/objects/85/21e7721390edb971bb04098cba2d50446b3d8f +0 -0
- llama_cpp_python-0.2.25/.git/objects/90/fdf9b2be7bbfb62c2b1463d6c95cc2d26ebb5c +0 -0
- llama_cpp_python-0.2.25/.git/objects/e9/48f8deb150039c6853e14537a8dd0cc9002b72 +0 -0
- llama_cpp_python-0.2.25/.git/refs/tags/v0.2.25 +0 -1
- llama_cpp_python-0.2.25/.git/shallow +0 -1
- llama_cpp_python-0.2.25/vendor/llama.cpp/.github/ISSUE_TEMPLATE/bug.md +0 -184
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.dockerignore +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/description +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/hooks/applypatch-msg.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/hooks/commit-msg.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/hooks/fsmonitor-watchman.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/hooks/post-update.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/hooks/pre-applypatch.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/hooks/pre-commit.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/hooks/pre-merge-commit.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/hooks/pre-push.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/hooks/pre-rebase.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/hooks/pre-receive.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/hooks/prepare-commit-msg.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/hooks/push-to-checkout.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/hooks/sendemail-validate.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/hooks/update.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/info/exclude +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/description +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/applypatch-msg.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/commit-msg.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/fsmonitor-watchman.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/post-update.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/pre-applypatch.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/pre-commit.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/pre-merge-commit.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/pre-push.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/pre-rebase.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/pre-receive.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/prepare-commit-msg.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/push-to-checkout.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/sendemail-validate.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/update.sample +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/info/exclude +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/refs/remotes/origin/HEAD +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/03/7f96a2dde3c50a29a26eaacf577f4ecac7c027 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/04/5856ea2ffe697ec33db1c1c989bd45cde5bb3d +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/04/7bc14424303575f73af90611fec827334f54e8 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/0e/2a907c0e40de1a432ee0cd2e3d01c634df1a37 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/0e/f132b07175867c07ad06fa22ca6b95eca67b59 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/11/2a0f7872fa9244bf38729a2722dc5c08dec20c +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/11/b6d5c26e3f7157658952b8ec353e985d522fac +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/13/c5b6b0df5f67e80cbe584909b83777901265a1 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/17/1f357f53b0de535157a2c9b4de04582784ec97 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/19/9bd4ffbf88c68c98b52c05f388dfa92716f6b7 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/23/e37d4d40e5ec0bfd85b5e928834d58e2cf0da6 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/24/04228325d88c59b819acaedb20b15635f75df9 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/27/2e4767b47397d1529e6a2d01298144d9d746ba +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/27/a6b1e5042318f85dc99c9f24f4dd6fc472d242 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/29/03e0146d304bcacbfabfe71f171a2edc03043e +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/2c/c6fb02dcb3226c1ecf0fbed6453eb3acadeaa2 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/2e/24903c21db75253911a70039ac5622672db813 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/34/6b4631ebd1f4af85e9988d4a528e00edba6375 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/39/697bfc2538bd4558018d0d721d6d6028c4bb56 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/3a/1d7180d508818fe957923e00dcd8950938632d +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/3a/6457dcdfd47e764654bacae0ba8347976b645a +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/41/cc68ea2402cf682807649d841e7c0f4175db01 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/44/b6d4a35d3caf0c65aef0a1c77ff0ab4077d405 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/47/4503fdfc554d8caabee4f321a80427f8c7d696 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/4b/3189dd1a54be3bc416786ddf184dd047dc4b20 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/51/f357200f8b998031f4be924e11ed2ae4bf3fea +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/55/d08db5f514fb6847938e3d6489b99f737ba6e3 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/56/2410fe1a4c4093d62e1705933eaf66c6d99a2c +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/5b/51e98ce432974ff031367f8937babe755e3d73 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/5f/350ffe99abe7297b2839f36fd0eaeb98887805 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/60/bc7aef42aac0409cfdca666ad2ff6f516d7b5b +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/61/027ef99725c50b0891fdbf0bf263a33abe648f +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/68/7316b327ca038d26a338b3070a7d4698322515 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/74/7c6130e3cb1479d20e2013b1dd3db3379c2266 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/76/e26fbd106895fba52861f8ac1e11cc6ee2a307 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/79/5dad726ae91f330c56e49bc188080b5b3b5ba9 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/79/a9e67a1aee09c6d182f240ba5eef32feabcbce +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/7a/7b899ec7dd48d192cba14ac9c8e9df4233f7fb +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/7d/5498f9d2c49c1f731b47845d845f6c5e16a3d9 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/7d/6c970483161eaf43cfa9d50010c071d4953053 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/7e/df0975dc12ccc95ad14de085f07efe6d65c620 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/7e/e8f748eab47180cea09c0ad8e75c3b991b4af4 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/80/7b0f57a8a873e58ade0ff0f5b0bcf0ff66b7f9 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/82/e5c4487e57d6d59c901a73bdd2a9bc172fee7c +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/8b/d03f88a1895cbf3ef249e13df79ee0efda779d +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/8e/32d2c0edce725a47b5845463133919cd766a61 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/91/abb11fdf507883caeeb2d2958e1c65fb6cbdc1 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/97/fe6e180b574c24eb4f07ef229981a3ac478bb7 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/99/32d6130f5552a9b85c8b15b4ac6bc26b1068ce +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/9d/0ec2f705618e591cfa8d6512cb9a96b3da75f1 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/9d/3d3559849603efda6f3c8181684e4d19e0ec79 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/a9/e51cdc1672134ec9af66c9eccf09f6da4ceccd +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/ac/82b8fbb81087ec9b3a72d9e377102a31b28d16 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/ae/c023cd61b3bb83bac968fffdd6aa0810af7c3d +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/af/f397f476fb7773d0e89b0e8913c8b1f97ca3e4 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/b1/f90b91594f496ad9f27b1a68584984f4b523d2 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/b5/affaa9d6087f3888dd9eedea209bb214b6e135 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/b9/1632f5bc787c1404600c894a6a4126359747d8 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/b9/373b7ac641e6e9c8d8cc64951139205d91d8bc +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/bb/cbbe7d61558adde3cbfd0c7a63a67c27ed6d30 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/c5/4e4eb5ce2636abd78df46a7616cfe9196a1198 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/c8/4fd04498c2fb188ff7c2a59473035fc90eb990 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/c8/5e73d2b657bb05ed99309615d67bac93d9f86e +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/ca/e7ebb7a833dafcd402a96bea3a9574f74f0ed5 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/ca/ebbb67fdb02a0a8897d4e4826ea046a9931f6f +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/cc/6a3a7252ea6e698614f0629d4bc040ab6ca717 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/cd/351ba33849dcf6af35b493f7405962fa1625d4 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/d3/329eec3bac6ce7e54c76b77ac9bf99fab0fe3f +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/da/c33b74dddf06fcfc01244044eebb102cfcea37 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/e2/13518b95011cb6ee783986624c3b6de8659f81 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/e4/1f375774e6945e445bfb179502b128fe22dda7 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/e9/30609ff5c479a1d5e12a8f3993582d421a6326 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/eb/9a2cfa9167df02f136502af79738c71363abfd +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/ef/1b2c0162e8edd321e2b9c1ce375d96f1f1d048 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/f0/827d762e852a21f6406c469300899d5f509b8f +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/f1/b8e9d154231932c4b7b9b59611626764e68632 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/f2/0813765a70679e8a063871c9ef75d75c65ccb6 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/fa/dfc5fb4fe6f5eb6d5d98b62519e374a5202b00 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/fc/25ff5160028dee3570249abc40cd57780bcca9 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/fc/ef8cd800ee8a265b146748d178cb56b5632bf3 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/fd/64c09b37947c97e58903ce570785c657d56722 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/fe/b0ed68d94eac48b844fd587ddfb808649716a1 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.git/objects/ff/3e950cd1110fe552912cea4c268c4023d2b737 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.github/dependabot.yml +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.github/workflows/build-and-release.yaml +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.github/workflows/build-docker.yaml +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.github/workflows/publish-to-test.yaml +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.github/workflows/publish.yaml +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.github/workflows/test-pypi.yaml +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.github/workflows/test.yaml +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.gitignore +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.gitmodules +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/.readthedocs.yaml +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/LICENSE.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/Makefile +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/docker/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/docker/cuda_simple/Dockerfile +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/docker/open_llama/Dockerfile +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/docker/open_llama/build.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/docker/open_llama/hug_model.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/docker/open_llama/start.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/docker/open_llama/start_server.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/docker/openblas_simple/Dockerfile +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/docker/simple/Dockerfile +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/docker/simple/run.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/docs/api-reference.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/docs/changelog.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/docs/index.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/docs/install/macos.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/docs/requirements.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/docs/server.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/examples/high_level_api/fastapi_server.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/examples/high_level_api/high_level_api_embedding.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/examples/high_level_api/high_level_api_inference.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/examples/high_level_api/high_level_api_streaming.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/examples/high_level_api/langchain_custom_llm.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/examples/low_level_api/Chat.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/examples/low_level_api/Miku.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/examples/low_level_api/ReasonAct.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/examples/low_level_api/common.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/examples/low_level_api/low_level_api_chat_cpp.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/examples/low_level_api/low_level_api_llama_cpp.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/examples/low_level_api/quantize.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/examples/low_level_api/readme/low_level_api_llama_cpp.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/examples/low_level_api/util.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/examples/notebooks/Batching.ipynb +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/examples/notebooks/Clients.ipynb +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/examples/notebooks/Functions.ipynb +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/examples/notebooks/Guidance.ipynb +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/examples/notebooks/Multimodal.ipynb +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/examples/notebooks/PerformanceTuning.ipynb +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/llama_cpp/_utils.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/llama_cpp/llama_chat_format.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/llama_cpp/llama_grammar.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/llama_cpp/llama_types.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/llama_cpp/llava_cpp.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/llama_cpp/py.typed +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/llama_cpp/server/__init__.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/llama_cpp/server/__main__.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/llama_cpp/server/app.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/llama_cpp/server/cli.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/llama_cpp/server/errors.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/llama_cpp/server/model.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/llama_cpp/server/settings.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/llama_cpp/server/types.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/mkdocs.yml +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/pyproject.toml +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/tests/test_grammar.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/tests/test_llama.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.clang-tidy +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.devops/cloud-v-pipeline +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.devops/full-cuda.Dockerfile +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.devops/full-rocm.Dockerfile +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.devops/full.Dockerfile +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.devops/main-cuda.Dockerfile +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.devops/main-rocm.Dockerfile +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.devops/main.Dockerfile +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.devops/tools.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.dockerignore +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.ecrc +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.editorconfig +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.flake8 +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.git +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.github/ISSUE_TEMPLATE/enhancement.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.github/workflows/build.yml +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.github/workflows/code-coverage.yml +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.github/workflows/editorconfig.yml +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.github/workflows/gguf-publish.yml +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.github/workflows/python-lint.yml +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.github/workflows/tidy-post.yml +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.github/workflows/tidy-review.yml +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.github/workflows/zig-build.yml +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.gitignore +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.pre-commit-config.yaml +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/LICENSE +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/Package.swift +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/SHA256SUMS +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/build.zig +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ci/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ci/run.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/cmake/FindSIMD.cmake +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/codecov.yml +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/base64.hpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/build-info.cpp.in +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/common.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/common.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/console.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/console.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/grammar-parser.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/grammar-parser.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/log.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/stb_image.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/train.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/train.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/convert-llama-ggml-to-gguf.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/convert-lora-to-ggml.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/convert-persimmon-to-gguf.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/docs/BLIS.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/docs/llama-star/idea-arch.key +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/docs/llama-star/idea-arch.pdf +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/docs/token_generation_performance_tips.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/Miku.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/alpaca.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched/batched.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched-bench/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched-bench/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched-bench/batched-bench.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched.swift/.gitignore +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched.swift/Makefile +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched.swift/Package.swift +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched.swift/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched.swift/Sources/main.swift +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/beam-search/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/beam-search/beam-search.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/benchmark/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/chat-13B.bat +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/chat-13B.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/chat-persistent.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/chat-vicuna.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/chat.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/convert-llama2c-to-ggml/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/embedding/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/embedding/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/embedding/embedding.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/export-lora/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/export-lora/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/export-lora/export-lora.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/finetune/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/finetune/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/finetune/convert-finetune-checkpoint-to-gguf.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/finetune/finetune.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/gguf/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/gguf/gguf.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/gpt4all.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/infill/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/infill/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/infill/infill.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/jeopardy/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/jeopardy/graph.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/jeopardy/jeopardy.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/jeopardy/qasheet.csv +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/jeopardy/questions.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/json-schema-to-grammar.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama-bench/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama-bench/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama-bench/llama-bench.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/.gitignore +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.cpp.swift/bridging-header.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/AccentColor.colorset/Contents.json +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/AppIcon.appiconset/Contents.json +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/Contents.json +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Models/LlamaState.swift +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Preview Content/Preview Assets.xcassets/Contents.json +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/DownloadButton.swift +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/llama_swiftuiApp.swift +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/contents.xcworkspacedata +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.vim +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama2-13b.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama2.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llava/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llava/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llava/clip.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llava/clip.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llava/convert-image-encoder-to-gguf.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llava/llava-cli.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llava/llava-surgery.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llava/llava.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llava/llava.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llm.vim +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/lookahead/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/lookahead/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/lookahead/lookahead.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/lookup/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/lookup/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/lookup/lookup.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/main/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/main/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/main/main.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/main-cmake-pkg/.gitignore +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/main-cmake-pkg/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/make-ggml.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/metal/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/metal/metal.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/parallel/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/parallel/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/parallel/parallel.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/perplexity/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/perplexity/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/perplexity/perplexity.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/quantize/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/quantize/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/quantize/quantize.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/quantize-stats/quantize-stats.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/reason-act.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/api_like_OAI.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/chat-llama2.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/chat.mjs +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/chat.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/completion.js.hpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/deps.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/httplib.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/index.html.hpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/index.js.hpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/json-schema-to-grammar.mjs.hpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/json.hpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/public/completion.js +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/public/index.html +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/public/index.js +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/public/json-schema-to-grammar.mjs +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server-llama2-13B.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/simple/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/simple/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/simple/simple.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/speculative/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/speculative/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/speculative/speculative.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/tokenize/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/tokenize/tokenize.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/train-text-from-scratch/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/flake.lock +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/flake.nix +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-alloc.c +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-alloc.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-backend-impl.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-backend.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-cuda.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-impl.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-metal.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-metal.m +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-metal.metal +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-mpi.c +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-mpi.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-opencl.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-opencl.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-quants.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/LICENSE +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/examples/writer.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/gguf/__init__.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/gguf/gguf.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/gguf/gguf_reader.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/gguf/gguf_writer.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/gguf/py.typed +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/gguf/vocab.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/pyproject.toml +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/scripts/__init__.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/scripts/gguf-convert-endian.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/scripts/gguf-dump.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/scripts/gguf-set-metadata.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/tests/test_gguf.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/grammars/README.md +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/grammars/arithmetic.gbnf +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/grammars/c.gbnf +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/grammars/chess.gbnf +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/grammars/japanese.gbnf +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/grammars/json.gbnf +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/grammars/json_arr.gbnf +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/grammars/list.gbnf +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/llama.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/media/llama-leader.jpeg +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/media/llama0-banner.png +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/media/llama0-logo.png +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/media/llama1-banner.png +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/media/llama1-logo.png +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/mypy.ini +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/pocs/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/pocs/vdot/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/pocs/vdot/q8dot.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/pocs/vdot/vdot.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/LLM-questions.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/alpaca.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/assistant.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/chat-with-baichuan.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/chat-with-bob.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/chat-with-qwen.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/chat.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/dan-modified.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/dan.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/mnemonics.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/parallel-questions.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/reason-act.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/requirements-hf-to-gguf.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/requirements.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/run_with_preset.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/LlamaConfig.cmake.in +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/build-info.cmake +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/build-info.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/convert-gg.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/gen-build-info-cpp.cmake +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/get-flags.mk +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/get-wikitext-2.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/qnt-all.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/run-all-perf.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/run-all-ppl.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/server-llm.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/sync-ggml.sh +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/verify-checksum-models.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/spm-headers/ggml.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/spm-headers/llama.h +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-backend-ops.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-c.c +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-double-float.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-grammar-parser.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-llama-grammar.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-opt.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-quantize-fns.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-quantize-perf.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-rope.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-sampling.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-tokenizer-0-falcon.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-tokenizer-0-falcon.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-tokenizer-0-llama.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-tokenizer-0-llama.py +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-tokenizer-1-llama.cpp +0 -0
- {llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/unicode.h +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
011c3630f5a130505458c29d58f1654d5efba3bf '011c3630f5a130505458c29d58f1654d5efba3bf' of https://github.com/abetlen/llama-cpp-python
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
011c3630f5a130505458c29d58f1654d5efba3bf
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
[gc]
|
|
10
10
|
auto = 0
|
|
11
11
|
[http "https://github.com/"]
|
|
12
|
-
extraheader = AUTHORIZATION: basic
|
|
12
|
+
extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzX1U4WUFIMlNCR2xPQ1NjYm1HcTR5ZlZod0ZKeEFUQjJxSmllTw==
|
|
13
13
|
[submodule "vendor/llama.cpp"]
|
|
14
14
|
active = true
|
|
15
15
|
url = https://github.com/ggerganov/llama.cpp.git
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 011c3630f5a130505458c29d58f1654d5efba3bf runner <runner@fv-az532-920.yhfsaq54z0vebhuvdla3z0z0vh.cx.internal.cloudapp.net> 1703716575 +0000 checkout: moving from master to refs/tags/v0.2.26
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
f6793491b5af6da75edad34d6f503ef86d31b09f
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
[gc]
|
|
14
14
|
auto = 0
|
|
15
15
|
[http "https://github.com/"]
|
|
16
|
-
extraheader = AUTHORIZATION: basic
|
|
16
|
+
extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzX1U4WUFIMlNCR2xPQ1NjYm1HcTR5ZlZod0ZKeEFUQjJxSmllTw==
|
|
17
17
|
[url "https://github.com/"]
|
|
18
18
|
insteadOf = git@github.com:
|
|
19
19
|
insteadOf = org-6826477@github.com:
|
|
Binary file
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 f6793491b5af6da75edad34d6f503ef86d31b09f runner <runner@fv-az532-920.yhfsaq54z0vebhuvdla3z0z0vh.cx.internal.cloudapp.net> 1703716576 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
|
|
2
|
+
f6793491b5af6da75edad34d6f503ef86d31b09f f6793491b5af6da75edad34d6f503ef86d31b09f runner <runner@fv-az532-920.yhfsaq54z0vebhuvdla3z0z0vh.cx.internal.cloudapp.net> 1703716576 +0000 checkout: moving from master to f6793491b5af6da75edad34d6f503ef86d31b09f
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 f6793491b5af6da75edad34d6f503ef86d31b09f runner <runner@fv-az532-920.yhfsaq54z0vebhuvdla3z0z0vh.cx.internal.cloudapp.net> 1703716576 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 f6793491b5af6da75edad34d6f503ef86d31b09f runner <runner@fv-az532-920.yhfsaq54z0vebhuvdla3z0z0vh.cx.internal.cloudapp.net> 1703716576 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
f6793491b5af6da75edad34d6f503ef86d31b09f
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
f6793491b5af6da75edad34d6f503ef86d31b09f
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
f6793491b5af6da75edad34d6f503ef86d31b09f
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
011c3630f5a130505458c29d58f1654d5efba3bf
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
011c3630f5a130505458c29d58f1654d5efba3bf
|
|
@@ -64,7 +64,7 @@ Try the following:
|
|
|
64
64
|
1. `git clone https://github.com/abetlen/llama-cpp-python`
|
|
65
65
|
2. `cd llama-cpp-python`
|
|
66
66
|
3. `rm -rf _skbuild/` # delete any old builds
|
|
67
|
-
4. `python
|
|
67
|
+
4. `python -m pip install .`
|
|
68
68
|
5. `cd ./vendor/llama.cpp`
|
|
69
69
|
6. Follow [llama.cpp's instructions](https://github.com/ggerganov/llama.cpp#build) to `cmake` llama.cpp
|
|
70
70
|
7. Run llama.cpp's `./main` with the same arguments you previously passed to llama-cpp-python and see if you can reproduce the issue. If you can, [log an issue with llama.cpp](https://github.com/ggerganov/llama.cpp/issues)
|
|
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.2.26]
|
|
11
|
+
|
|
12
|
+
- feat: Update llama.cpp to ggerganov/llama.cpp@f6793491b5af6da75edad34d6f503ef86d31b09f
|
|
13
|
+
|
|
10
14
|
## [0.2.25]
|
|
11
15
|
|
|
12
16
|
- feat(server): Multi model support by @D4ve-R in #931
|
|
@@ -850,7 +850,7 @@ class Llama:
|
|
|
850
850
|
) # 0x7FFFFFFF is INT32 max, will be auto set to all layers
|
|
851
851
|
self.model_params.main_gpu = main_gpu
|
|
852
852
|
self.tensor_split = tensor_split
|
|
853
|
-
self.
|
|
853
|
+
self._c_tensor_split = None
|
|
854
854
|
if self.tensor_split is not None:
|
|
855
855
|
if len(self.tensor_split) > llama_cpp.LLAMA_MAX_DEVICES:
|
|
856
856
|
raise ValueError(
|
|
@@ -62,6 +62,9 @@ def _load_shared_library(lib_base_name: str):
|
|
|
62
62
|
if "CUDA_PATH" in os.environ:
|
|
63
63
|
os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "bin"))
|
|
64
64
|
os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "lib"))
|
|
65
|
+
if "HIP_PATH" in os.environ:
|
|
66
|
+
os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "bin"))
|
|
67
|
+
os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "lib"))
|
|
65
68
|
cdll_args["winmode"] = ctypes.RTLD_GLOBAL
|
|
66
69
|
|
|
67
70
|
# Try to load the shared library, handling potential errors
|
|
@@ -90,9 +93,7 @@ c_size_t_p = POINTER(c_size_t)
|
|
|
90
93
|
|
|
91
94
|
# llama.h bindings
|
|
92
95
|
|
|
93
|
-
|
|
94
|
-
GGML_CUDA_MAX_DEVICES = 16
|
|
95
|
-
LLAMA_MAX_DEVICES = GGML_CUDA_MAX_DEVICES if GGML_USE_CUBLAS else 1
|
|
96
|
+
LLAMA_MAX_DEVICES = _lib.llama_max_devices()
|
|
96
97
|
|
|
97
98
|
# define LLAMA_DEFAULT_SEED 0xFFFFFFFF
|
|
98
99
|
LLAMA_DEFAULT_SEED = 0xFFFFFFFF
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Bug template
|
|
3
|
+
about: Used to report bugs in llama.cpp
|
|
4
|
+
labels: ["bug-unconfirmed"]
|
|
5
|
+
assignees: ''
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Please include information about your system, the steps to reproduce the bug, and the version of llama.cpp that you are using. If possible, please provide a minimal code example that reproduces the bug.
|
{llama_cpp_python-0.2.25 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.github/workflows/docker.yml
RENAMED
|
@@ -98,5 +98,5 @@ jobs:
|
|
|
98
98
|
context: .
|
|
99
99
|
push: ${{ github.event_name == 'push' }}
|
|
100
100
|
platforms: ${{ matrix.config.platforms }}
|
|
101
|
-
tags: "ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }}
|
|
101
|
+
tags: "ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
|
|
102
102
|
file: ${{ matrix.config.dockerfile }}
|
|
@@ -302,6 +302,8 @@ if (LLAMA_CUBLAS)
|
|
|
302
302
|
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
|
|
303
303
|
endif()
|
|
304
304
|
|
|
305
|
+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cuda_driver)
|
|
306
|
+
|
|
305
307
|
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
|
|
306
308
|
# 52 == lowest CUDA 12 standard
|
|
307
309
|
# 60 == f16 CUDA intrinsics
|
|
@@ -367,17 +367,15 @@ endif # LLAMA_BLIS
|
|
|
367
367
|
|
|
368
368
|
ifdef LLAMA_CUBLAS
|
|
369
369
|
MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include
|
|
370
|
-
MK_LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib
|
|
370
|
+
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib
|
|
371
371
|
OBJS += ggml-cuda.o
|
|
372
372
|
MK_NVCCFLAGS = -use_fast_math
|
|
373
373
|
ifndef JETSON_EOL_MODULE_DETECT
|
|
374
374
|
MK_NVCCFLAGS += --forward-unknown-to-host-compiler
|
|
375
375
|
endif # JETSON_EOL_MODULE_DETECT
|
|
376
|
-
|
|
377
376
|
ifdef LLAMA_DEBUG
|
|
378
377
|
MK_NVCCFLAGS += -lineinfo
|
|
379
|
-
endif
|
|
380
|
-
|
|
378
|
+
endif # LLAMA_DEBUG
|
|
381
379
|
ifdef LLAMA_CUDA_NVCC
|
|
382
380
|
NVCC = $(LLAMA_CUDA_NVCC)
|
|
383
381
|
else
|
|
@@ -102,6 +102,7 @@ as the main playground for developing new features for the [ggml](https://github
|
|
|
102
102
|
- [x] [Deepseek models](https://huggingface.co/models?search=deepseek-ai/deepseek)
|
|
103
103
|
- [x] [Qwen models](https://huggingface.co/models?search=Qwen/Qwen)
|
|
104
104
|
- [x] [Mixtral MoE](https://huggingface.co/models?search=mistral-ai/Mixtral)
|
|
105
|
+
- [x] [PLaMo-13B](https://github.com/ggerganov/llama.cpp/pull/3557)
|
|
105
106
|
|
|
106
107
|
**Multimodal models:**
|
|
107
108
|
|
|
@@ -132,6 +133,7 @@ as the main playground for developing new features for the [ggml](https://github
|
|
|
132
133
|
- [withcatai/catai](https://github.com/withcatai/catai)
|
|
133
134
|
- [semperai/amica](https://github.com/semperai/amica)
|
|
134
135
|
- [psugihara/FreeChat](https://github.com/psugihara/FreeChat)
|
|
136
|
+
- [ptsochantaris/emeltal](https://github.com/ptsochantaris/emeltal)
|
|
135
137
|
|
|
136
138
|
---
|
|
137
139
|
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# AWQ: Activation-aware Weight Quantization for LLM - version apply to llamacpp
|
|
2
|
+
[[Paper](https://arxiv.org/abs/2306.00978)][[Original Repo](https://github.com/mit-han-lab/llm-awq)][[Easy-to-use Repo](https://github.com/casper-hansen/AutoAWQ)]
|
|
3
|
+
|
|
4
|
+
**Supported models:**
|
|
5
|
+
|
|
6
|
+
- [X] LLaMA
|
|
7
|
+
- [x] LLaMA 2
|
|
8
|
+
- [X] MPT
|
|
9
|
+
- [X] Mistral AI v0.1
|
|
10
|
+
- [ ] Bloom
|
|
11
|
+
- [ ] Mixtral MoE
|
|
12
|
+
|
|
13
|
+
**TODO:**
|
|
14
|
+
- [x] Update version work with both MPT and MPT-AWQ model
|
|
15
|
+
- [ ] Add OPT model
|
|
16
|
+
- [ ] Add Bloom model
|
|
17
|
+
- [ ] Add Mixtral MoE
|
|
18
|
+
- [ ] Support w3, w2
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
## Contents
|
|
22
|
+
|
|
23
|
+
- [Install](##Install)
|
|
24
|
+
- [Convert](##Convert)
|
|
25
|
+
- [Quantize](##Quantize)
|
|
26
|
+
- [Test](##Test)
|
|
27
|
+
- [Benchmark](##Benchmark)
|
|
28
|
+
- [Results](##Results)
|
|
29
|
+
|
|
30
|
+
## Install
|
|
31
|
+
Install requirements
|
|
32
|
+
```bash
|
|
33
|
+
pip install -r requirements.txt
|
|
34
|
+
```
|
|
35
|
+
Get the pre-computed AWQ search results for multiple model families, including LLaMA, LLaMA2, MPT, OPT
|
|
36
|
+
```bash
|
|
37
|
+
git clone https://huggingface.co/datasets/mit-han-lab/awq-model-zoo awq_cache
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Convert
|
|
41
|
+
Example for llama model
|
|
42
|
+
```bash
|
|
43
|
+
# For llama7b and llama2 models
|
|
44
|
+
python convert.py models/llama-7b/ --awq-path awq_cache/llama-7b-w4-g128.pt --outfile models/llama_7b_fp16.gguf
|
|
45
|
+
# For mistral and mpt models
|
|
46
|
+
python convert-hf-to-gguf.py models/mpt-7b/ --awq-path awq_cache/llama-7b-w4-g128.pt --outfile models/mpt_7b_fp16.gguf
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Quantize
|
|
50
|
+
```bash
|
|
51
|
+
# We only benchmark and confirm the results on q4_0, q4_1, and q2_k types.
|
|
52
|
+
./quantize models/llama_7b_fp16.gguf models/llama_7b_q4_0.gguf q4_0
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Test
|
|
56
|
+
```bash
|
|
57
|
+
# For all models.
|
|
58
|
+
./build/bin/main -m models/llama_7b_q4_0.gguf -n 128 --prompt "Once upon a time"
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Benchmark
|
|
62
|
+
The perplexity measurements in table above are done against the `wikitext2` test dataset (https://paperswithcode.com/dataset/wikitext-2), with context length of 512.
|
|
63
|
+
```bash
|
|
64
|
+
# For llama and llama2, and mistral models.
|
|
65
|
+
./perplexity -m models/llama_7b_q4_0.gguf -f datasets/wikitext-2-raw/wiki.test.raw
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Results
|
|
69
|
+
Results are run on OpenBLAS (CPU) and CuBLAS (GPU) for fair comparison
|
|
70
|
+
We use three types of llamacpp quantization methods to work with our version, including q4_0, q4_1, and q2_k
|
|
71
|
+
|
|
72
|
+
### Llama 7B (Build with OpenBLAS)
|
|
73
|
+
|
|
74
|
+
| Model | Measure | F16 | Q4_0 | Q4_1 | Q2_K |
|
|
75
|
+
|-----------:|--------------|-------:|-------:|-------:|-------:|
|
|
76
|
+
|Llama 7B | perplexity | 5.9066 | 6.1214 | 6.0643 | 6.5808 |
|
|
77
|
+
|Llama 7B | file size | 12.9G | 3.5G | 3.9G | 2.7G |
|
|
78
|
+
|Llama 7B | bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
|
79
|
+
|AWQ-LLama 7B| perplexity | 5.9175 | 6.0252 | 5.9987 | 6.3692 |
|
|
80
|
+
|AWQ-LLama 7B| file size | 12.9G | 3.5G | 3.9G | 2.7G |
|
|
81
|
+
|AWQ-LLama 7B| bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
### Llama2 7B (Build with CuBLAS)
|
|
85
|
+
|
|
86
|
+
| Model | Measure | F16 | Q4_0 | Q4_1 | Q2_K |
|
|
87
|
+
|------------:|--------------|-------:|-------:|-------:|-------:|
|
|
88
|
+
|Llama2 7B | perplexity | 5.8664 | 6.0260 | 6.0656 | 6.4496 |
|
|
89
|
+
|Llama2 7B | file size | 12.9G | 3.5G | 3.9G | 2.7G |
|
|
90
|
+
|Llama2 7B | bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
|
91
|
+
|AWQ-LLama2 7B| perplexity | 5.8801 | 6.0054 | 5.9849 | 6.3650 |
|
|
92
|
+
|AWQ-LLama2 7B| file size | 12.9G | 3.5G | 3.9G | 2.7G |
|
|
93
|
+
|AWQ-LLama2 7B| bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
### Mistral 7B v0.1 (Build with CuBLAS)
|
|
97
|
+
|
|
98
|
+
| Model | Measure | F16 | Q4_0 | Q4_1 | Q2_K |
|
|
99
|
+
|-------------:|--------------|-------:|-------:|-------:|-------:|
|
|
100
|
+
|Mistral 7B | perplexity | 5.6931 | 5.8202 | 5.8268 | 6.1645 |
|
|
101
|
+
|Mistral 7B | file size | 14.5G | 4.1G | 4.5G | 3.1G |
|
|
102
|
+
|Mistral 7B | bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
|
103
|
+
|AWQ-Mistral 7B| perplexity | 5.6934 | 5.8020 | 5.7691 | 6.0426 |
|
|
104
|
+
|AWQ-Mistral 7B| file size | 14.5G | 4.1G | 4.5G | 3.1G |
|
|
105
|
+
|AWQ-Mistral 7B| bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
|
106
|
+
|
|
107
|
+
### MPT 7B (Build with OpenBLAS)
|
|
108
|
+
|
|
109
|
+
| Model | Measure | F16 | Q4_0 | Q4_1 | Q2_K |
|
|
110
|
+
|---------:|--------------|-------:|-------:|-------:|--------:|
|
|
111
|
+
|MPT 7B | perplexity | 8.4369 | 8.7956 | 8.6265 | 11.4913 |
|
|
112
|
+
|MPT 7B | file size | 13.7G | 3.9G | 4.3G | 2.8G |
|
|
113
|
+
|MPT 7B | bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
|
114
|
+
|AWQ-MPT 7B| perplexity | 8.4944 | 8.7053 | 8.6750 | 10.2873|
|
|
115
|
+
|AWQ-MPT 7B| file size | 13.7G | 3.9G | 4.3G | 2.8G |
|
|
116
|
+
|AWQ-MPT 7B| bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Implements the AWQ for llama.cpp use cases.
|
|
3
|
+
Original paper: https://arxiv.org/abs/2306.00978
|
|
4
|
+
|
|
5
|
+
This code is based on versions of the AWQ implementation found in the following repositories:
|
|
6
|
+
* https://github.com/mit-han-lab/llm-awq
|
|
7
|
+
* https://github.com/casper-hansen/AutoAWQ
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
import torch
|
|
12
|
+
import torch.nn as nn
|
|
13
|
+
|
|
14
|
+
from transformers import AutoModelForCausalLM, AutoConfig
|
|
15
|
+
from transformers.models.bloom.modeling_bloom import BloomGelu
|
|
16
|
+
from transformers.models.llama.modeling_llama import LlamaRMSNorm
|
|
17
|
+
from transformers.activations import GELUActivation
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ScaledActivation(nn.Module):
|
|
21
|
+
"""
|
|
22
|
+
ScaledActivation module wraps an existing activation function and applies a
|
|
23
|
+
scale factor to its output.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
module (nn.Module): The activation function to be scaled.
|
|
27
|
+
scales (torch.Tensor): A tensor of size (num_features,) containing the initial
|
|
28
|
+
scale factors for each feature.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
torch.Tensor: The scaled output of the activation function.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, module, scales):
|
|
35
|
+
super().__init__()
|
|
36
|
+
self.act = module
|
|
37
|
+
self.scales = nn.Parameter(scales.data)
|
|
38
|
+
|
|
39
|
+
def forward(self, x):
|
|
40
|
+
return self.act(x) / self.scales.view(1, 1, -1).to(x.device)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def set_op_by_name(layer, name, new_module):
|
|
44
|
+
"""
|
|
45
|
+
Set the new module for given module's name.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
layer (nn.Module): The layer in which to replace the submodule.
|
|
49
|
+
name (str): The path to the submodule to be replaced, using dot notation
|
|
50
|
+
to access nested modules.
|
|
51
|
+
new_module (nn.Module): The new module to replace the existing one.
|
|
52
|
+
"""
|
|
53
|
+
levels = name.split(".")
|
|
54
|
+
if len(levels) > 1:
|
|
55
|
+
mod_ = layer
|
|
56
|
+
for l_idx in range(len(levels) - 1):
|
|
57
|
+
if levels[l_idx].isdigit():
|
|
58
|
+
mod_ = mod_[int(levels[l_idx])]
|
|
59
|
+
else:
|
|
60
|
+
mod_ = getattr(mod_, levels[l_idx])
|
|
61
|
+
setattr(mod_, levels[-1], new_module)
|
|
62
|
+
else:
|
|
63
|
+
setattr(layer, name, new_module)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def get_op_by_name(module, op_name):
|
|
67
|
+
"""
|
|
68
|
+
Retrieves a submodule within a given layer based on its name.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
module (nn.Module): The layer containing the submodule to find.
|
|
72
|
+
op_name (str): The name of the submodule.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
nn.Module: The requested submodule found within the given layer.
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
ValueError: If the specified submodule cannot be found within the layer.
|
|
79
|
+
"""
|
|
80
|
+
for name, m in module.named_modules():
|
|
81
|
+
if name == op_name:
|
|
82
|
+
return m
|
|
83
|
+
raise ValueError(f"Cannot find op {op_name} in module {module}")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@torch.no_grad()
|
|
87
|
+
def scale_ln_fcs(ln, fcs, scales):
|
|
88
|
+
"""
|
|
89
|
+
Scales the weights of a LayerNorm and a list of fully-connected layers proportionally.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
ln (nn.LayerNorm): The LayerNorm module to be scaled.
|
|
93
|
+
fcs (List[nn.Linear]): A list of fully-connected layers to be scaled.
|
|
94
|
+
scales (torch.Tensor): A 1D tensor of size (num_features,).
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
if not isinstance(fcs, list):
|
|
98
|
+
fcs = [fcs]
|
|
99
|
+
|
|
100
|
+
scales = scales.to(ln.weight.device)
|
|
101
|
+
|
|
102
|
+
ln.weight.div_(scales)
|
|
103
|
+
if hasattr(ln, "bias") and ln.bias is not None:
|
|
104
|
+
ln.bias.div_(scales)
|
|
105
|
+
|
|
106
|
+
for fc in fcs:
|
|
107
|
+
fc.weight.mul_(scales.view(1, -1))
|
|
108
|
+
|
|
109
|
+
for p in ln.parameters():
|
|
110
|
+
assert torch.isnan(p).sum() == 0
|
|
111
|
+
for fc in fcs:
|
|
112
|
+
for p in fc.parameters():
|
|
113
|
+
assert torch.isnan(p).sum() == 0
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@torch.no_grad()
|
|
117
|
+
def scale_fc_fc(fc1, fc2, scales):
|
|
118
|
+
"""
|
|
119
|
+
Scales the weights of two fully-connected layers in a specific pattern.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
fc1 (nn.Linear): The first fully-connected layer to be scaled.
|
|
123
|
+
fc2 (nn.Linear): The second fully-connected layer to be scaled.
|
|
124
|
+
scales (torch.Tensor): A 1D tensor of size (num_features,).
|
|
125
|
+
"""
|
|
126
|
+
assert isinstance(fc1, nn.Linear)
|
|
127
|
+
assert isinstance(fc2, nn.Linear)
|
|
128
|
+
|
|
129
|
+
scales = scales.to(fc1.weight.device)
|
|
130
|
+
|
|
131
|
+
fc1.weight[-scales.size(0):].div_(scales.view(-1, 1))
|
|
132
|
+
if fc1.bias is not None:
|
|
133
|
+
fc1.bias.div_(scales.view(-1))
|
|
134
|
+
|
|
135
|
+
fc2.weight.mul_(scales.view(1, -1))
|
|
136
|
+
|
|
137
|
+
for p in fc1.parameters():
|
|
138
|
+
assert torch.isnan(p).sum() == 0
|
|
139
|
+
for p in fc2.parameters():
|
|
140
|
+
assert torch.isnan(p).sum() == 0
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@torch.no_grad()
|
|
144
|
+
def scale_gelu_fc(gelu, fc, scales):
|
|
145
|
+
"""
|
|
146
|
+
Scales the weight of a GELU activation and a fully-connected layer proportionally.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
gelu (Union[nn.GELU, BloomGelu, GELUActivation]): The GELU activation module to be scaled.
|
|
150
|
+
fc (nn.Linear): The fully-connected layer to be scaled.
|
|
151
|
+
scales (torch.Tensor): A 1D tensor of size (num_features,).
|
|
152
|
+
|
|
153
|
+
Raises:
|
|
154
|
+
TypeError: If the `gelu` module is not of type `nn.GELU`, `BloomGelu`, or `GELUActivation`.
|
|
155
|
+
TypeError: If the `fc` module is not of type `nn.Linear`.
|
|
156
|
+
"""
|
|
157
|
+
assert isinstance(gelu, (nn.GELU, BloomGelu, GELUActivation))
|
|
158
|
+
assert isinstance(fc, nn.Linear)
|
|
159
|
+
|
|
160
|
+
fc.weight.mul_(scales.view(1, -1).to(fc.weight.device))
|
|
161
|
+
|
|
162
|
+
for p in fc.parameters():
|
|
163
|
+
assert torch.isnan(p).sum() == 0
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def apply_scale(module, scales_list, input_feat_dict=None):
|
|
167
|
+
"""
|
|
168
|
+
Applies different scaling strategies to layers based on their type and hierarchy within a given module.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
module (nn.Module): The module containing the layers to be scaled.
|
|
172
|
+
scales_list (List[Tuple[str, List[str], torch.Tensor]]): A list of tuples containing:
|
|
173
|
+
* prev_op_name (str): The name of the preceding operation or module,
|
|
174
|
+
relative to which the layers to be scaled are located.
|
|
175
|
+
* layer_names (List[str]): A list of names of the layers to be scaled, relative to the preceding operation.
|
|
176
|
+
* scales (torch.Tensor): A 1D tensor of size (num_features,) containing the scaling factors for each feature.
|
|
177
|
+
input_feat_dict (Optional[Dict[str, torch.Tensor]]): A dictionary mapping layer names to their corresponding
|
|
178
|
+
input features (optional).
|
|
179
|
+
"""
|
|
180
|
+
for prev_op_name, layer_names, scales in scales_list:
|
|
181
|
+
prev_op = get_op_by_name(module, prev_op_name)
|
|
182
|
+
layers = [get_op_by_name(module, name) for name in layer_names]
|
|
183
|
+
|
|
184
|
+
prev_op.cuda()
|
|
185
|
+
for layer in layers:
|
|
186
|
+
layer.cuda()
|
|
187
|
+
scales.cuda()
|
|
188
|
+
|
|
189
|
+
if isinstance(prev_op, nn.Linear):
|
|
190
|
+
assert len(layers) == 1
|
|
191
|
+
scale_fc_fc(prev_op, layers[0], scales)
|
|
192
|
+
elif isinstance(prev_op, (nn.LayerNorm, LlamaRMSNorm)) or "rmsnorm" in str(prev_op.__class__).lower():
|
|
193
|
+
scale_ln_fcs(prev_op, layers, scales)
|
|
194
|
+
elif isinstance(prev_op, (nn.GELU, BloomGelu, GELUActivation)):
|
|
195
|
+
new_module = ScaledActivation(prev_op, scales)
|
|
196
|
+
set_op_by_name(module, prev_op_name, new_module)
|
|
197
|
+
scale_gelu_fc(prev_op, layers[0], scales)
|
|
198
|
+
else:
|
|
199
|
+
raise NotImplementedError(f"prev_op {type(prev_op)} not supported yet!")
|
|
200
|
+
|
|
201
|
+
# apply the scaling to input feat if given; prepare it for clipping
|
|
202
|
+
if input_feat_dict is not None:
|
|
203
|
+
for layer_name in layer_names:
|
|
204
|
+
inp = input_feat_dict[layer_name]
|
|
205
|
+
inp.div_(scales.view(1, -1).to(inp.device))
|
|
206
|
+
|
|
207
|
+
prev_op.cpu()
|
|
208
|
+
for layer in layers:
|
|
209
|
+
layer.cpu()
|
|
210
|
+
scales.cpu()
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
@torch.no_grad()
|
|
214
|
+
def apply_clip(module, clip_list):
|
|
215
|
+
"""
|
|
216
|
+
Applies element-wise clipping to the weight of a specific layer within a given module.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
module (nn.Module): The module containing the layer to be clipped.
|
|
220
|
+
clip_list (List[Tuple[str, torch.Tensor]]): A list of tuples containing:
|
|
221
|
+
* name (str): The name of the layer to be clipped, relative to the root of the module.
|
|
222
|
+
* max_val (torch.Tensor): A 1D or 2D tensor defining the upper bound for each element of the layer's weight.
|
|
223
|
+
"""
|
|
224
|
+
for name, max_val in clip_list:
|
|
225
|
+
layer = get_op_by_name(module, name)
|
|
226
|
+
layer.cuda()
|
|
227
|
+
max_val = max_val.to(layer.weight.device)
|
|
228
|
+
org_shape = layer.weight.shape
|
|
229
|
+
layer.weight.data = layer.weight.data.reshape(*max_val.shape[:2], -1)
|
|
230
|
+
layer.weight.data = torch.clamp(layer.weight.data, -max_val, max_val)
|
|
231
|
+
layer.weight.data = layer.weight.data.reshape(org_shape)
|
|
232
|
+
layer.cpu()
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def add_scale_weights(model_path, scale_path, tmp_path):
|
|
236
|
+
"""
|
|
237
|
+
Adds pre-computed Activation Weight Quantization (AWQ) results to a model,
|
|
238
|
+
including scaling factors and clipping bounds.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
model_path (str): Path to the pre-trained model to be equipped with AWQ.
|
|
242
|
+
scale_path (str): Path to the AWQ scale factors (.pt file).
|
|
243
|
+
tmp_path (str): Path to the temporary directory where the equipped model will be saved.
|
|
244
|
+
"""
|
|
245
|
+
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
|
|
246
|
+
model = AutoModelForCausalLM.from_pretrained(
|
|
247
|
+
model_path, config=config, trust_remote_code=True
|
|
248
|
+
)
|
|
249
|
+
model.eval()
|
|
250
|
+
awq_results = torch.load(str(scale_path), map_location="cpu")
|
|
251
|
+
apply_scale(model, awq_results["scale"])
|
|
252
|
+
apply_clip(model, awq_results["clip"])
|
|
253
|
+
model.save_pretrained(str(tmp_path))
|
|
254
|
+
os.system(f"cp {str(model_path)}/tokenizer* {str(tmp_path)}")
|