llama-cpp-python 0.2.24__tar.gz → 0.2.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_cpp_python-0.2.26/.git/FETCH_HEAD +1 -0
- llama_cpp_python-0.2.26/.git/HEAD +1 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/config +1 -1
- llama_cpp_python-0.2.26/.git/index +0 -0
- llama_cpp_python-0.2.26/.git/logs/HEAD +1 -0
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/HEAD +1 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/config +1 -1
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/index +0 -0
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/logs/HEAD +2 -0
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/logs/refs/heads/master +1 -0
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/logs/refs/remotes/origin/HEAD +1 -0
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/objects/pack/pack-0111b9b091eb3bb79082a11785b10794d01ad8e7.idx +0 -0
- llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/objects/pack/pack-b630c306381137c42fbadc02d9f2c158165b39b1.pack → llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/objects/pack/pack-0111b9b091eb3bb79082a11785b10794d01ad8e7.pack +0 -0
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/objects/pack/pack-0111b9b091eb3bb79082a11785b10794d01ad8e7.rev +0 -0
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/packed-refs +2 -0
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/refs/heads/master +1 -0
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/refs/tags/b1708 +1 -0
- llama_cpp_python-0.2.26/.git/modules/vendor/llama.cpp/shallow +1 -0
- llama_cpp_python-0.2.26/.git/objects/01/1c3630f5a130505458c29d58f1654d5efba3bf +0 -0
- llama_cpp_python-0.2.26/.git/objects/0d/870969f4b23bd92a09ec29134d3fb454d38bec +0 -0
- llama_cpp_python-0.2.26/.git/objects/22/8e3b99b75ebb836a7e001d2c1316eb7799f3de +0 -0
- llama_cpp_python-0.2.26/.git/objects/2e/ead7f8523a78a1f4d8c776de42d230265bcf37 +0 -0
- llama_cpp_python-0.2.26/.git/objects/34/6b4631ebd1f4af85e9988d4a528e00edba6375 +0 -0
- llama_cpp_python-0.2.26/.git/objects/46/6e2cf6a1f3a75ef800a56f5fd5e85090a98786 +0 -0
- llama_cpp_python-0.2.26/.git/objects/5d/b46643482a53bdce74c1e51b4ff3944dcf866b +0 -0
- llama_cpp_python-0.2.26/.git/objects/75/800c0d80955f2bc82409ca16eda2b0dc405757 +0 -0
- llama_cpp_python-0.2.26/.git/objects/7f/2e3d8c1e2a505f69b12024b9cc80712f447cdd +0 -0
- llama_cpp_python-0.2.24/.git/objects/3d/07614e35e03d55630abf4e92857441fdcaf91f → llama_cpp_python-0.2.26/.git/objects/80/7b0f57a8a873e58ade0ff0f5b0bcf0ff66b7f9 +0 -0
- llama_cpp_python-0.2.26/.git/objects/8e/32d2c0edce725a47b5845463133919cd766a61 +0 -0
- llama_cpp_python-0.2.26/.git/objects/91/78a22255fef8acfe298d0f27640f65f647d400 +0 -0
- llama_cpp_python-0.2.26/.git/objects/97/fe6e180b574c24eb4f07ef229981a3ac478bb7 +0 -0
- llama_cpp_python-0.2.26/.git/objects/9d/3d3559849603efda6f3c8181684e4d19e0ec79 +0 -0
- llama_cpp_python-0.2.26/.git/objects/b9/373b7ac641e6e9c8d8cc64951139205d91d8bc +0 -0
- llama_cpp_python-0.2.26/.git/objects/c5/4e4eb5ce2636abd78df46a7616cfe9196a1198 +0 -0
- llama_cpp_python-0.2.26/.git/objects/c8/4fd04498c2fb188ff7c2a59473035fc90eb990 +0 -0
- llama_cpp_python-0.2.26/.git/objects/cd/351ba33849dcf6af35b493f7405962fa1625d4 +0 -0
- llama_cpp_python-0.2.26/.git/objects/d3/efb3a6fe1e001db62ec08e5d31ce1d08567045 +0 -0
- llama_cpp_python-0.2.24/.git/objects/5d/f12aaf53a0e85f55e1aa0e5167bc831ab32783 → llama_cpp_python-0.2.26/.git/objects/eb/0fb9662e690d0f9de4632cddd321b3f872a725 +0 -0
- llama_cpp_python-0.2.26/.git/objects/f0/827d762e852a21f6406c469300899d5f509b8f +0 -0
- llama_cpp_python-0.2.26/.git/objects/fa/dfc5fb4fe6f5eb6d5d98b62519e374a5202b00 +0 -0
- llama_cpp_python-0.2.26/.git/refs/tags/v0.2.26 +1 -0
- llama_cpp_python-0.2.26/.git/shallow +1 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.github/ISSUE_TEMPLATE/bug_report.md +1 -1
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/CHANGELOG.md +13 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/PKG-INFO +4 -1
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/README.md +3 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/docs/server.md +95 -1
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/llama_cpp/__init__.py +1 -1
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/llama_cpp/llama.py +18 -10
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/llama_cpp/llama_cpp.py +38 -22
- llama_cpp_python-0.2.26/llama_cpp/server/__main__.py +88 -0
- llama_cpp_python-0.2.26/llama_cpp/server/app.py +350 -0
- llama_cpp_python-0.2.26/llama_cpp/server/cli.py +97 -0
- llama_cpp_python-0.2.26/llama_cpp/server/errors.py +210 -0
- llama_cpp_python-0.2.26/llama_cpp/server/model.py +126 -0
- llama_cpp_python-0.2.26/llama_cpp/server/settings.py +167 -0
- llama_cpp_python-0.2.26/llama_cpp/server/types.py +266 -0
- llama_cpp_python-0.2.26/vendor/llama.cpp/.github/ISSUE_TEMPLATE/bug.md +9 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.github/workflows/docker.yml +32 -2
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.gitignore +1 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/CMakeLists.txt +6 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/Makefile +29 -9
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/README.md +23 -7
- llama_cpp_python-0.2.26/vendor/llama.cpp/awq-py/README.md +116 -0
- llama_cpp_python-0.2.26/vendor/llama.cpp/awq-py/awq/apply_awq.py +254 -0
- llama_cpp_python-0.2.26/vendor/llama.cpp/awq-py/requirements.txt +2 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/build-info.cpp +1 -1
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/common.cpp +1 -1
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/common.h +2 -1
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/sampling.cpp +50 -6
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/sampling.h +3 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/convert-hf-to-gguf.py +109 -4
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/convert.py +20 -4
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/CMakeLists.txt +1 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/baby-llama/baby-llama.cpp +3 -12
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/export-lora/export-lora.cpp +1 -1
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/finetune/finetune.cpp +27 -29
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/gguf/gguf.cpp +0 -1
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llava/clip.cpp +1 -7
- llama_cpp_python-0.2.26/vendor/llama.cpp/examples/lookup/CMakeLists.txt +5 -0
- llama_cpp_python-0.2.26/vendor/llama.cpp/examples/lookup/README.md +13 -0
- llama_cpp_python-0.2.26/vendor/llama.cpp/examples/lookup/lookup.cpp +230 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/README.md +2 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/server.cpp +44 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +5 -9
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-alloc.c +13 -5
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-backend-impl.h +12 -8
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-backend.c +81 -15
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-backend.h +7 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-cuda.cu +697 -482
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-metal.h +3 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-metal.m +190 -44
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-quants.c +25 -344
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml.c +32 -47
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml.h +14 -10
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/README.md +1 -1
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/gguf/constants.py +20 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/gguf/tensor_mapping.py +28 -14
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/gguf/vocab.py +1 -1
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/llama.cpp +838 -750
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/llama.h +7 -3
- llama_cpp_python-0.2.26/vendor/llama.cpp/scripts/sync-ggml-am.sh +131 -0
- llama_cpp_python-0.2.26/vendor/llama.cpp/scripts/sync-ggml.last +1 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-backend-ops.cpp +5 -4
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-grad0.cpp +5 -8
- llama_cpp_python-0.2.24/.git/FETCH_HEAD +0 -1
- llama_cpp_python-0.2.24/.git/HEAD +0 -1
- llama_cpp_python-0.2.24/.git/index +0 -0
- llama_cpp_python-0.2.24/.git/logs/HEAD +0 -1
- llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/HEAD +0 -1
- llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/index +0 -0
- llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/logs/HEAD +0 -2
- llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/logs/refs/heads/master +0 -1
- llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/logs/refs/remotes/origin/HEAD +0 -1
- llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/objects/pack/pack-b630c306381137c42fbadc02d9f2c158165b39b1.idx +0 -0
- llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/objects/pack/pack-b630c306381137c42fbadc02d9f2c158165b39b1.rev +0 -0
- llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/packed-refs +0 -2
- llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/refs/heads/master +0 -1
- llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/refs/tags/b1660 +0 -1
- llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/shallow +0 -1
- llama_cpp_python-0.2.24/.git/objects/00/031ba49010ec116b38b81ec4f6902d35e44313 +0 -0
- llama_cpp_python-0.2.24/.git/objects/1c/e1f0b7c1c9711fd12aa10073b82dc0a156a384 +0 -0
- llama_cpp_python-0.2.24/.git/objects/2f/413db166489b25b74d502e2acc4f6c54269247 +0 -0
- llama_cpp_python-0.2.24/.git/objects/45/fc5a8579674a501e1772722b3e1e2c775dd01f +0 -0
- llama_cpp_python-0.2.24/.git/objects/4e/1e5622839e64c024cfbfb9beb903d8ed75849f +0 -0
- llama_cpp_python-0.2.24/.git/objects/4f/341a9a49ea26463a218bb3f3b178ba5c7495be +0 -0
- llama_cpp_python-0.2.24/.git/objects/50/df3282f98d691e83e44ef9eca34a9f01f3a769 +0 -0
- llama_cpp_python-0.2.24/.git/objects/53/8e3ff160a5b155620234989336931e2e9f35b7 +0 -0
- llama_cpp_python-0.2.24/.git/objects/5c/f12f19f63a787b5ca00ef8ed698397dd6bfdbf +0 -0
- llama_cpp_python-0.2.24/.git/objects/5e/863d8a3bf9384f5574166385d63b6896d929a6 +0 -1
- llama_cpp_python-0.2.24/.git/objects/90/fdf9b2be7bbfb62c2b1463d6c95cc2d26ebb5c +0 -0
- llama_cpp_python-0.2.24/.git/objects/a8/62a27c09eba0394770472bc9c6e7f6ea6243df +0 -0
- llama_cpp_python-0.2.24/.git/objects/e9/48f8deb150039c6853e14537a8dd0cc9002b72 +0 -0
- llama_cpp_python-0.2.24/.git/objects/f1/698ab834de40dfe365a8c198f0349a960ec4c7 +0 -0
- llama_cpp_python-0.2.24/.git/objects/fa/39047f8f65be84af6bd5ebad4f976912d3c4e0 +0 -0
- llama_cpp_python-0.2.24/.git/refs/tags/v0.2.24 +0 -1
- llama_cpp_python-0.2.24/.git/shallow +0 -1
- llama_cpp_python-0.2.24/llama_cpp/server/__main__.py +0 -101
- llama_cpp_python-0.2.24/llama_cpp/server/app.py +0 -912
- llama_cpp_python-0.2.24/vendor/llama.cpp/.github/ISSUE_TEMPLATE/bug.md +0 -184
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.dockerignore +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/description +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/hooks/applypatch-msg.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/hooks/commit-msg.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/hooks/fsmonitor-watchman.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/hooks/post-update.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/hooks/pre-applypatch.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/hooks/pre-commit.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/hooks/pre-merge-commit.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/hooks/pre-push.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/hooks/pre-rebase.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/hooks/pre-receive.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/hooks/prepare-commit-msg.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/hooks/push-to-checkout.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/hooks/sendemail-validate.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/hooks/update.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/info/exclude +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/description +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/applypatch-msg.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/commit-msg.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/fsmonitor-watchman.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/post-update.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/pre-applypatch.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/pre-commit.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/pre-merge-commit.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/pre-push.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/pre-rebase.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/pre-receive.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/prepare-commit-msg.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/push-to-checkout.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/sendemail-validate.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/hooks/update.sample +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/info/exclude +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/modules/vendor/llama.cpp/refs/remotes/origin/HEAD +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/03/7f96a2dde3c50a29a26eaacf577f4ecac7c027 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/04/5856ea2ffe697ec33db1c1c989bd45cde5bb3d +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/04/7bc14424303575f73af90611fec827334f54e8 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/0e/2a907c0e40de1a432ee0cd2e3d01c634df1a37 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/0e/f132b07175867c07ad06fa22ca6b95eca67b59 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/11/2a0f7872fa9244bf38729a2722dc5c08dec20c +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/11/b6d5c26e3f7157658952b8ec353e985d522fac +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/13/c5b6b0df5f67e80cbe584909b83777901265a1 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/17/1f357f53b0de535157a2c9b4de04582784ec97 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/19/9bd4ffbf88c68c98b52c05f388dfa92716f6b7 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/23/e37d4d40e5ec0bfd85b5e928834d58e2cf0da6 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/24/04228325d88c59b819acaedb20b15635f75df9 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/27/2e4767b47397d1529e6a2d01298144d9d746ba +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/27/a6b1e5042318f85dc99c9f24f4dd6fc472d242 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/29/03e0146d304bcacbfabfe71f171a2edc03043e +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/2c/c6fb02dcb3226c1ecf0fbed6453eb3acadeaa2 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/2e/24903c21db75253911a70039ac5622672db813 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/39/697bfc2538bd4558018d0d721d6d6028c4bb56 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/3a/1d7180d508818fe957923e00dcd8950938632d +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/3a/6457dcdfd47e764654bacae0ba8347976b645a +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/41/cc68ea2402cf682807649d841e7c0f4175db01 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/44/b6d4a35d3caf0c65aef0a1c77ff0ab4077d405 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/47/4503fdfc554d8caabee4f321a80427f8c7d696 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/4b/3189dd1a54be3bc416786ddf184dd047dc4b20 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/51/f357200f8b998031f4be924e11ed2ae4bf3fea +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/55/d08db5f514fb6847938e3d6489b99f737ba6e3 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/56/2410fe1a4c4093d62e1705933eaf66c6d99a2c +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/5b/51e98ce432974ff031367f8937babe755e3d73 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/5f/350ffe99abe7297b2839f36fd0eaeb98887805 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/60/bc7aef42aac0409cfdca666ad2ff6f516d7b5b +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/61/027ef99725c50b0891fdbf0bf263a33abe648f +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/68/7316b327ca038d26a338b3070a7d4698322515 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/74/7c6130e3cb1479d20e2013b1dd3db3379c2266 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/76/e26fbd106895fba52861f8ac1e11cc6ee2a307 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/79/5dad726ae91f330c56e49bc188080b5b3b5ba9 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/79/a9e67a1aee09c6d182f240ba5eef32feabcbce +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/7a/7b899ec7dd48d192cba14ac9c8e9df4233f7fb +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/7d/5498f9d2c49c1f731b47845d845f6c5e16a3d9 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/7d/6c970483161eaf43cfa9d50010c071d4953053 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/7e/df0975dc12ccc95ad14de085f07efe6d65c620 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/7e/e8f748eab47180cea09c0ad8e75c3b991b4af4 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/82/e5c4487e57d6d59c901a73bdd2a9bc172fee7c +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/8b/d03f88a1895cbf3ef249e13df79ee0efda779d +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/91/abb11fdf507883caeeb2d2958e1c65fb6cbdc1 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/99/32d6130f5552a9b85c8b15b4ac6bc26b1068ce +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/9d/0ec2f705618e591cfa8d6512cb9a96b3da75f1 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/a9/e51cdc1672134ec9af66c9eccf09f6da4ceccd +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/ac/82b8fbb81087ec9b3a72d9e377102a31b28d16 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/ae/c023cd61b3bb83bac968fffdd6aa0810af7c3d +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/af/f397f476fb7773d0e89b0e8913c8b1f97ca3e4 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/b1/f90b91594f496ad9f27b1a68584984f4b523d2 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/b5/affaa9d6087f3888dd9eedea209bb214b6e135 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/b9/1632f5bc787c1404600c894a6a4126359747d8 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/bb/cbbe7d61558adde3cbfd0c7a63a67c27ed6d30 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/c8/5e73d2b657bb05ed99309615d67bac93d9f86e +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/ca/e7ebb7a833dafcd402a96bea3a9574f74f0ed5 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/ca/ebbb67fdb02a0a8897d4e4826ea046a9931f6f +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/cc/6a3a7252ea6e698614f0629d4bc040ab6ca717 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/d3/329eec3bac6ce7e54c76b77ac9bf99fab0fe3f +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/da/c33b74dddf06fcfc01244044eebb102cfcea37 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/e2/13518b95011cb6ee783986624c3b6de8659f81 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/e4/1f375774e6945e445bfb179502b128fe22dda7 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/e9/30609ff5c479a1d5e12a8f3993582d421a6326 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/eb/9a2cfa9167df02f136502af79738c71363abfd +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/ef/1b2c0162e8edd321e2b9c1ce375d96f1f1d048 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/f1/b8e9d154231932c4b7b9b59611626764e68632 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/f2/0813765a70679e8a063871c9ef75d75c65ccb6 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/fc/25ff5160028dee3570249abc40cd57780bcca9 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/fc/ef8cd800ee8a265b146748d178cb56b5632bf3 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/fd/64c09b37947c97e58903ce570785c657d56722 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/fe/b0ed68d94eac48b844fd587ddfb808649716a1 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.git/objects/ff/3e950cd1110fe552912cea4c268c4023d2b737 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.github/dependabot.yml +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.github/workflows/build-and-release.yaml +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.github/workflows/build-docker.yaml +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.github/workflows/publish-to-test.yaml +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.github/workflows/publish.yaml +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.github/workflows/test-pypi.yaml +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.github/workflows/test.yaml +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.gitignore +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.gitmodules +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/.readthedocs.yaml +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/LICENSE.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/Makefile +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/docker/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/docker/cuda_simple/Dockerfile +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/docker/open_llama/Dockerfile +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/docker/open_llama/build.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/docker/open_llama/hug_model.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/docker/open_llama/start.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/docker/open_llama/start_server.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/docker/openblas_simple/Dockerfile +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/docker/simple/Dockerfile +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/docker/simple/run.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/docs/api-reference.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/docs/changelog.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/docs/index.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/docs/install/macos.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/docs/requirements.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/examples/high_level_api/fastapi_server.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/examples/high_level_api/high_level_api_embedding.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/examples/high_level_api/high_level_api_inference.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/examples/high_level_api/high_level_api_streaming.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/examples/high_level_api/langchain_custom_llm.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/examples/low_level_api/Chat.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/examples/low_level_api/Miku.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/examples/low_level_api/ReasonAct.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/examples/low_level_api/common.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/examples/low_level_api/low_level_api_chat_cpp.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/examples/low_level_api/low_level_api_llama_cpp.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/examples/low_level_api/quantize.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/examples/low_level_api/readme/low_level_api_llama_cpp.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/examples/low_level_api/util.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/examples/notebooks/Batching.ipynb +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/examples/notebooks/Clients.ipynb +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/examples/notebooks/Functions.ipynb +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/examples/notebooks/Guidance.ipynb +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/examples/notebooks/Multimodal.ipynb +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/examples/notebooks/PerformanceTuning.ipynb +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/llama_cpp/_utils.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/llama_cpp/llama_chat_format.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/llama_cpp/llama_grammar.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/llama_cpp/llama_types.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/llama_cpp/llava_cpp.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/llama_cpp/py.typed +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/llama_cpp/server/__init__.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/mkdocs.yml +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/pyproject.toml +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/tests/test_grammar.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/tests/test_llama.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.clang-tidy +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.devops/cloud-v-pipeline +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.devops/full-cuda.Dockerfile +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.devops/full-rocm.Dockerfile +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.devops/full.Dockerfile +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.devops/main-cuda.Dockerfile +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.devops/main-rocm.Dockerfile +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.devops/main.Dockerfile +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.devops/tools.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.dockerignore +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.ecrc +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.editorconfig +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.flake8 +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.git +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.github/ISSUE_TEMPLATE/enhancement.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.github/workflows/build.yml +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.github/workflows/code-coverage.yml +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.github/workflows/editorconfig.yml +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.github/workflows/gguf-publish.yml +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.github/workflows/python-lint.yml +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.github/workflows/tidy-post.yml +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.github/workflows/tidy-review.yml +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.github/workflows/zig-build.yml +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/.pre-commit-config.yaml +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/LICENSE +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/Package.swift +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/SHA256SUMS +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/build.zig +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ci/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ci/run.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/cmake/FindSIMD.cmake +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/codecov.yml +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/base64.hpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/build-info.cpp.in +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/console.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/console.h +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/grammar-parser.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/grammar-parser.h +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/log.h +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/stb_image.h +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/train.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/common/train.h +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/convert-llama-ggml-to-gguf.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/convert-lora-to-ggml.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/convert-persimmon-to-gguf.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/docs/BLIS.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/docs/llama-star/idea-arch.key +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/docs/llama-star/idea-arch.pdf +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/docs/token_generation_performance_tips.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/Miku.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/alpaca.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched/batched.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched-bench/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched-bench/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched-bench/batched-bench.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched.swift/.gitignore +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched.swift/Makefile +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched.swift/Package.swift +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched.swift/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/batched.swift/Sources/main.swift +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/beam-search/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/beam-search/beam-search.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/benchmark/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/chat-13B.bat +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/chat-13B.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/chat-persistent.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/chat-vicuna.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/chat.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/convert-llama2c-to-ggml/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/embedding/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/embedding/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/embedding/embedding.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/export-lora/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/export-lora/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/finetune/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/finetune/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/finetune/convert-finetune-checkpoint-to-gguf.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/finetune/finetune.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/gpt4all.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/infill/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/infill/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/infill/infill.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/jeopardy/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/jeopardy/graph.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/jeopardy/jeopardy.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/jeopardy/qasheet.csv +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/jeopardy/questions.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/json-schema-to-grammar.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama-bench/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama-bench/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama-bench/llama-bench.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/.gitignore +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.cpp.swift/bridging-header.h +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/AccentColor.colorset/Contents.json +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/AppIcon.appiconset/Contents.json +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/Contents.json +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Models/LlamaState.swift +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Preview Content/Preview Assets.xcassets/Contents.json +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/DownloadButton.swift +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/llama_swiftuiApp.swift +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/contents.xcworkspacedata +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama.vim +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama2-13b.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llama2.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llava/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llava/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llava/clip.h +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llava/convert-image-encoder-to-gguf.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llava/llava-cli.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llava/llava-surgery.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llava/llava.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llava/llava.h +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/llm.vim +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/lookahead/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/lookahead/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/lookahead/lookahead.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/main/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/main/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/main/main.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/main-cmake-pkg/.gitignore +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/main-cmake-pkg/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/make-ggml.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/metal/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/metal/metal.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/parallel/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/parallel/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/parallel/parallel.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/perplexity/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/perplexity/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/perplexity/perplexity.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/quantize/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/quantize/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/quantize/quantize.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/quantize-stats/quantize-stats.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/reason-act.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/api_like_OAI.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/chat-llama2.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/chat.mjs +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/chat.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/completion.js.hpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/deps.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/httplib.h +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/index.html.hpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/index.js.hpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/json-schema-to-grammar.mjs.hpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/json.hpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/public/completion.js +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/public/index.html +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/public/index.js +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server/public/json-schema-to-grammar.mjs +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/server-llama2-13B.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/simple/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/simple/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/simple/simple.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/speculative/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/speculative/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/speculative/speculative.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/tokenize/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/tokenize/tokenize.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/train-text-from-scratch/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/flake.lock +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/flake.nix +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-alloc.h +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-cuda.h +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-impl.h +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-metal.metal +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-mpi.c +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-mpi.h +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-opencl.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-opencl.h +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/ggml-quants.h +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/LICENSE +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/examples/writer.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/gguf/__init__.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/gguf/gguf.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/gguf/gguf_reader.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/gguf/gguf_writer.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/gguf/py.typed +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/pyproject.toml +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/scripts/__init__.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/scripts/gguf-convert-endian.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/scripts/gguf-dump.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/scripts/gguf-set-metadata.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/gguf-py/tests/test_gguf.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/grammars/README.md +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/grammars/arithmetic.gbnf +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/grammars/c.gbnf +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/grammars/chess.gbnf +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/grammars/japanese.gbnf +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/grammars/json.gbnf +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/grammars/json_arr.gbnf +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/grammars/list.gbnf +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/media/llama-leader.jpeg +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/media/llama0-banner.png +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/media/llama0-logo.png +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/media/llama1-banner.png +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/media/llama1-logo.png +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/mypy.ini +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/pocs/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/pocs/vdot/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/pocs/vdot/q8dot.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/pocs/vdot/vdot.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/LLM-questions.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/alpaca.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/assistant.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/chat-with-baichuan.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/chat-with-bob.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/chat-with-qwen.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/chat.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/dan-modified.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/dan.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/mnemonics.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/parallel-questions.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/prompts/reason-act.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/requirements-hf-to-gguf.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/requirements.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/run_with_preset.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/LlamaConfig.cmake.in +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/build-info.cmake +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/build-info.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/convert-gg.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/gen-build-info-cpp.cmake +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/get-flags.mk +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/get-wikitext-2.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/qnt-all.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/run-all-perf.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/run-all-ppl.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/server-llm.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/sync-ggml.sh +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/scripts/verify-checksum-models.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/spm-headers/ggml.h +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/spm-headers/llama.h +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-c.c +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-double-float.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-grammar-parser.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-llama-grammar.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-opt.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-quantize-fns.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-quantize-perf.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-rope.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-sampling.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-tokenizer-0-falcon.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-tokenizer-0-falcon.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-tokenizer-0-llama.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-tokenizer-0-llama.py +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/tests/test-tokenizer-1-llama.cpp +0 -0
- {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.26}/vendor/llama.cpp/unicode.h +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
011c3630f5a130505458c29d58f1654d5efba3bf '011c3630f5a130505458c29d58f1654d5efba3bf' of https://github.com/abetlen/llama-cpp-python
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
011c3630f5a130505458c29d58f1654d5efba3bf
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
[gc]
|
|
10
10
|
auto = 0
|
|
11
11
|
[http "https://github.com/"]
|
|
12
|
-
extraheader = AUTHORIZATION: basic
|
|
12
|
+
extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzX1U4WUFIMlNCR2xPQ1NjYm1HcTR5ZlZod0ZKeEFUQjJxSmllTw==
|
|
13
13
|
[submodule "vendor/llama.cpp"]
|
|
14
14
|
active = true
|
|
15
15
|
url = https://github.com/ggerganov/llama.cpp.git
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 011c3630f5a130505458c29d58f1654d5efba3bf runner <runner@fv-az532-920.yhfsaq54z0vebhuvdla3z0z0vh.cx.internal.cloudapp.net> 1703716575 +0000 checkout: moving from master to refs/tags/v0.2.26
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
f6793491b5af6da75edad34d6f503ef86d31b09f
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
[gc]
|
|
14
14
|
auto = 0
|
|
15
15
|
[http "https://github.com/"]
|
|
16
|
-
extraheader = AUTHORIZATION: basic
|
|
16
|
+
extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzX1U4WUFIMlNCR2xPQ1NjYm1HcTR5ZlZod0ZKeEFUQjJxSmllTw==
|
|
17
17
|
[url "https://github.com/"]
|
|
18
18
|
insteadOf = git@github.com:
|
|
19
19
|
insteadOf = org-6826477@github.com:
|
|
Binary file
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 f6793491b5af6da75edad34d6f503ef86d31b09f runner <runner@fv-az532-920.yhfsaq54z0vebhuvdla3z0z0vh.cx.internal.cloudapp.net> 1703716576 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
|
|
2
|
+
f6793491b5af6da75edad34d6f503ef86d31b09f f6793491b5af6da75edad34d6f503ef86d31b09f runner <runner@fv-az532-920.yhfsaq54z0vebhuvdla3z0z0vh.cx.internal.cloudapp.net> 1703716576 +0000 checkout: moving from master to f6793491b5af6da75edad34d6f503ef86d31b09f
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 f6793491b5af6da75edad34d6f503ef86d31b09f runner <runner@fv-az532-920.yhfsaq54z0vebhuvdla3z0z0vh.cx.internal.cloudapp.net> 1703716576 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 f6793491b5af6da75edad34d6f503ef86d31b09f runner <runner@fv-az532-920.yhfsaq54z0vebhuvdla3z0z0vh.cx.internal.cloudapp.net> 1703716576 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
f6793491b5af6da75edad34d6f503ef86d31b09f
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
f6793491b5af6da75edad34d6f503ef86d31b09f
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
f6793491b5af6da75edad34d6f503ef86d31b09f
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
011c3630f5a130505458c29d58f1654d5efba3bf
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
011c3630f5a130505458c29d58f1654d5efba3bf
|
|
@@ -64,7 +64,7 @@ Try the following:
|
|
|
64
64
|
1. `git clone https://github.com/abetlen/llama-cpp-python`
|
|
65
65
|
2. `cd llama-cpp-python`
|
|
66
66
|
3. `rm -rf _skbuild/` # delete any old builds
|
|
67
|
-
4. `python
|
|
67
|
+
4. `python -m pip install .`
|
|
68
68
|
5. `cd ./vendor/llama.cpp`
|
|
69
69
|
6. Follow [llama.cpp's instructions](https://github.com/ggerganov/llama.cpp#build) to `cmake` llama.cpp
|
|
70
70
|
7. Run llama.cpp's `./main` with the same arguments you previously passed to llama-cpp-python and see if you can reproduce the issue. If you can, [log an issue with llama.cpp](https://github.com/ggerganov/llama.cpp/issues)
|
|
@@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.2.26]
|
|
11
|
+
|
|
12
|
+
- feat: Update llama.cpp to ggerganov/llama.cpp@f6793491b5af6da75edad34d6f503ef86d31b09f
|
|
13
|
+
|
|
14
|
+
## [0.2.25]
|
|
15
|
+
|
|
16
|
+
- feat(server): Multi model support by @D4ve-R in #931
|
|
17
|
+
- feat(server): Support none defaulting to infinity for completions by @swg in #111
|
|
18
|
+
- feat(server): Implement openai api compatible authentication by @docmeth2 in #1010
|
|
19
|
+
- fix: text_offset of multi-token characters by @twaka in #1037
|
|
20
|
+
- fix: ctypes bindings for kv override by @phiharri in #1011
|
|
21
|
+
- fix: ctypes definitions of llama_kv_cache_view_update and llama_kv_cache_view_free. by @e-c-d in #1028
|
|
22
|
+
|
|
10
23
|
## [0.2.24]
|
|
11
24
|
|
|
12
25
|
- feat: Update llama.cpp to ggerganov/llama.cpp@0e18b2e7d0b5c0a509ea40098def234b8d4a938a
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: llama_cpp_python
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.26
|
|
4
4
|
Summary: Python bindings for the llama.cpp library
|
|
5
5
|
Author-Email: Andrei Betlen <abetlen@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -60,11 +60,13 @@ This package provides:
|
|
|
60
60
|
- [Local Copilot replacement](https://llama-cpp-python.readthedocs.io/en/latest/server/#code-completion)
|
|
61
61
|
- [Function Calling support](https://llama-cpp-python.readthedocs.io/en/latest/server/#function-calling)
|
|
62
62
|
- [Vision API support](https://llama-cpp-python.readthedocs.io/en/latest/server/#multimodal-models)
|
|
63
|
+
- [Multiple Models](https://llama-cpp-python.readthedocs.io/en/latest/server/#configuration-and-multi-model-support)
|
|
63
64
|
|
|
64
65
|
Documentation is available at [https://llama-cpp-python.readthedocs.io/en/latest](https://llama-cpp-python.readthedocs.io/en/latest).
|
|
65
66
|
|
|
66
67
|
|
|
67
68
|
|
|
69
|
+
|
|
68
70
|
## Installation
|
|
69
71
|
|
|
70
72
|
`llama-cpp-python` can be installed directly from PyPI as a source distribution by running:
|
|
@@ -374,6 +376,7 @@ For possible options, see [llama_cpp/llama_chat_format.py](llama_cpp/llama_chat_
|
|
|
374
376
|
- [Local Copilot replacement](https://llama-cpp-python.readthedocs.io/en/latest/server/#code-completion)
|
|
375
377
|
- [Function Calling support](https://llama-cpp-python.readthedocs.io/en/latest/server/#function-calling)
|
|
376
378
|
- [Vision API support](https://llama-cpp-python.readthedocs.io/en/latest/server/#multimodal-models)
|
|
379
|
+
- [Multiple Models](https://llama-cpp-python.readthedocs.io/en/latest/server/#configuration-and-multi-model-support)
|
|
377
380
|
|
|
378
381
|
## Docker image
|
|
379
382
|
|
|
@@ -18,11 +18,13 @@ This package provides:
|
|
|
18
18
|
- [Local Copilot replacement](https://llama-cpp-python.readthedocs.io/en/latest/server/#code-completion)
|
|
19
19
|
- [Function Calling support](https://llama-cpp-python.readthedocs.io/en/latest/server/#function-calling)
|
|
20
20
|
- [Vision API support](https://llama-cpp-python.readthedocs.io/en/latest/server/#multimodal-models)
|
|
21
|
+
- [Multiple Models](https://llama-cpp-python.readthedocs.io/en/latest/server/#configuration-and-multi-model-support)
|
|
21
22
|
|
|
22
23
|
Documentation is available at [https://llama-cpp-python.readthedocs.io/en/latest](https://llama-cpp-python.readthedocs.io/en/latest).
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
|
|
27
|
+
|
|
26
28
|
## Installation
|
|
27
29
|
|
|
28
30
|
`llama-cpp-python` can be installed directly from PyPI as a source distribution by running:
|
|
@@ -332,6 +334,7 @@ For possible options, see [llama_cpp/llama_chat_format.py](llama_cpp/llama_chat_
|
|
|
332
334
|
- [Local Copilot replacement](https://llama-cpp-python.readthedocs.io/en/latest/server/#code-completion)
|
|
333
335
|
- [Function Calling support](https://llama-cpp-python.readthedocs.io/en/latest/server/#function-calling)
|
|
334
336
|
- [Vision API support](https://llama-cpp-python.readthedocs.io/en/latest/server/#multimodal-models)
|
|
337
|
+
- [Multiple Models](https://llama-cpp-python.readthedocs.io/en/latest/server/#configuration-and-multi-model-support)
|
|
335
338
|
|
|
336
339
|
## Docker image
|
|
337
340
|
|
|
@@ -32,6 +32,12 @@ python3 -m llama_cpp.server --help
|
|
|
32
32
|
|
|
33
33
|
NOTE: All server options are also available as environment variables. For example, `--model` can be set by setting the `MODEL` environment variable.
|
|
34
34
|
|
|
35
|
+
Check out the server config reference below settings for more information on the available options.
|
|
36
|
+
CLI arguments and environment variables are available for all of the fields defined in [`ServerSettings`](#llama_cpp.server.settings.ServerSettings) and [`ModelSettings`](#llama_cpp.server.settings.ModelSettings)
|
|
37
|
+
|
|
38
|
+
Additionally the server supports configuration check out the [configuration section](#configuration-and-multi-model-support) for more information and examples.
|
|
39
|
+
|
|
40
|
+
|
|
35
41
|
## Guides
|
|
36
42
|
|
|
37
43
|
### Code Completion
|
|
@@ -121,4 +127,92 @@ response = client.chat.completions.create(
|
|
|
121
127
|
],
|
|
122
128
|
)
|
|
123
129
|
print(response)
|
|
124
|
-
```
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## Configuration and Multi-Model Support
|
|
133
|
+
|
|
134
|
+
The server supports configuration via a JSON config file that can be passed using the `--config_file` parameter or the `CONFIG_FILE` environment variable.
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
python3 -m llama_cpp.server --config_file <config_file>
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Config files support all of the server and model options supported by the cli and environment variables however instead of only a single model the config file can specify multiple models.
|
|
141
|
+
|
|
142
|
+
The server supports routing requests to multiple models based on the `model` parameter in the request which matches against the `model_alias` in the config file.
|
|
143
|
+
|
|
144
|
+
At the moment only a single model is loaded into memory at, the server will automatically load and unload models as needed.
|
|
145
|
+
|
|
146
|
+
```json
|
|
147
|
+
{
|
|
148
|
+
"host": "0.0.0.0",
|
|
149
|
+
"port": 8080,
|
|
150
|
+
"models": [
|
|
151
|
+
{
|
|
152
|
+
"model": "models/OpenHermes-2.5-Mistral-7B-GGUF/openhermes-2.5-mistral-7b.Q4_K_M.gguf",
|
|
153
|
+
"model_alias": "gpt-3.5-turbo",
|
|
154
|
+
"chat_format": "chatml",
|
|
155
|
+
"n_gpu_layers": -1,
|
|
156
|
+
"offload_kqv": true,
|
|
157
|
+
"n_threads": 12,
|
|
158
|
+
"n_batch": 512,
|
|
159
|
+
"n_ctx": 2048
|
|
160
|
+
},
|
|
161
|
+
{
|
|
162
|
+
"model": "models/OpenHermes-2.5-Mistral-7B-GGUF/openhermes-2.5-mistral-7b.Q4_K_M.gguf",
|
|
163
|
+
"model_alias": "gpt-4",
|
|
164
|
+
"chat_format": "chatml",
|
|
165
|
+
"n_gpu_layers": -1,
|
|
166
|
+
"offload_kqv": true,
|
|
167
|
+
"n_threads": 12,
|
|
168
|
+
"n_batch": 512,
|
|
169
|
+
"n_ctx": 2048
|
|
170
|
+
},
|
|
171
|
+
{
|
|
172
|
+
"model": "models/ggml_llava-v1.5-7b/ggml-model-q4_k.gguf",
|
|
173
|
+
"model_alias": "gpt-4-vision-preview",
|
|
174
|
+
"chat_format": "llava-1-5",
|
|
175
|
+
"clip_model_path": "models/ggml_llava-v1.5-7b/mmproj-model-f16.gguf",
|
|
176
|
+
"n_gpu_layers": -1,
|
|
177
|
+
"offload_kqv": true,
|
|
178
|
+
"n_threads": 12,
|
|
179
|
+
"n_batch": 512,
|
|
180
|
+
"n_ctx": 2048
|
|
181
|
+
},
|
|
182
|
+
{
|
|
183
|
+
"model": "models/mistral-7b-v0.1-GGUF/ggml-model-Q4_K.gguf",
|
|
184
|
+
"model_alias": "text-davinci-003",
|
|
185
|
+
"n_gpu_layers": -1,
|
|
186
|
+
"offload_kqv": true,
|
|
187
|
+
"n_threads": 12,
|
|
188
|
+
"n_batch": 512,
|
|
189
|
+
"n_ctx": 2048
|
|
190
|
+
},
|
|
191
|
+
{
|
|
192
|
+
"model": "models/replit-code-v1_5-3b-GGUF/replit-code-v1_5-3b.Q4_0.gguf",
|
|
193
|
+
"model_alias": "copilot-codex",
|
|
194
|
+
"n_gpu_layers": -1,
|
|
195
|
+
"offload_kqv": true,
|
|
196
|
+
"n_threads": 12,
|
|
197
|
+
"n_batch": 1024,
|
|
198
|
+
"n_ctx": 9216
|
|
199
|
+
}
|
|
200
|
+
]
|
|
201
|
+
}
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
The config file format is defined by the [`ConfigFileSettings`](#llama_cpp.server.settings.ConfigFileSettings) class.
|
|
205
|
+
|
|
206
|
+
## Server Options Reference
|
|
207
|
+
|
|
208
|
+
::: llama_cpp.server.settings.ConfigFileSettings
|
|
209
|
+
options:
|
|
210
|
+
show_if_no_docstring: true
|
|
211
|
+
|
|
212
|
+
::: llama_cpp.server.settings.ServerSettings
|
|
213
|
+
options:
|
|
214
|
+
show_if_no_docstring: true
|
|
215
|
+
|
|
216
|
+
::: llama_cpp.server.settings.ModelSettings
|
|
217
|
+
options:
|
|
218
|
+
show_if_no_docstring: true
|
|
@@ -850,7 +850,7 @@ class Llama:
|
|
|
850
850
|
) # 0x7FFFFFFF is INT32 max, will be auto set to all layers
|
|
851
851
|
self.model_params.main_gpu = main_gpu
|
|
852
852
|
self.tensor_split = tensor_split
|
|
853
|
-
self.
|
|
853
|
+
self._c_tensor_split = None
|
|
854
854
|
if self.tensor_split is not None:
|
|
855
855
|
if len(self.tensor_split) > llama_cpp.LLAMA_MAX_DEVICES:
|
|
856
856
|
raise ValueError(
|
|
@@ -1551,11 +1551,13 @@ class Llama:
|
|
|
1551
1551
|
"utf-8", errors="ignore"
|
|
1552
1552
|
)
|
|
1553
1553
|
text_offset = len(prompt) + len(
|
|
1554
|
-
self.detokenize(completion_tokens[:returned_tokens])
|
|
1554
|
+
self.detokenize(completion_tokens[:returned_tokens]).decode(
|
|
1555
|
+
"utf-8", errors="ignore"
|
|
1556
|
+
)
|
|
1555
1557
|
)
|
|
1556
1558
|
token_offset = len(prompt_tokens) + returned_tokens
|
|
1557
1559
|
logits = self._scores[token_offset - 1, :]
|
|
1558
|
-
current_logprobs = Llama.logits_to_logprobs(logits)
|
|
1560
|
+
current_logprobs = Llama.logits_to_logprobs(logits).tolist()
|
|
1559
1561
|
sorted_logprobs = list(
|
|
1560
1562
|
sorted(
|
|
1561
1563
|
zip(current_logprobs, range(len(current_logprobs))),
|
|
@@ -1674,7 +1676,7 @@ class Llama:
|
|
|
1674
1676
|
)
|
|
1675
1677
|
token_offset = len(prompt_tokens) + returned_tokens - 1
|
|
1676
1678
|
logits = self._scores[token_offset, :]
|
|
1677
|
-
current_logprobs = Llama.logits_to_logprobs(logits)
|
|
1679
|
+
current_logprobs = Llama.logits_to_logprobs(logits).tolist()
|
|
1678
1680
|
sorted_logprobs = list(
|
|
1679
1681
|
sorted(
|
|
1680
1682
|
zip(current_logprobs, range(len(current_logprobs))),
|
|
@@ -1789,13 +1791,19 @@ class Llama:
|
|
|
1789
1791
|
]
|
|
1790
1792
|
all_logprobs = Llama.logits_to_logprobs(self._scores)[token_offset:]
|
|
1791
1793
|
# TODO: may be able to change this loop to use np.take_along_dim
|
|
1792
|
-
for token, token_str, logprobs_token in
|
|
1793
|
-
all_tokens, all_token_strs, all_logprobs
|
|
1794
|
+
for idx, (token, token_str, logprobs_token) in enumerate(
|
|
1795
|
+
zip(all_tokens, all_token_strs, all_logprobs)
|
|
1794
1796
|
):
|
|
1795
1797
|
if token == self.token_bos():
|
|
1796
1798
|
continue
|
|
1797
|
-
text_offsets.append(
|
|
1798
|
-
|
|
1799
|
+
text_offsets.append(
|
|
1800
|
+
text_offset
|
|
1801
|
+
+ len(
|
|
1802
|
+
self.detokenize(all_tokens[:idx]).decode(
|
|
1803
|
+
"utf-8", errors="ignore"
|
|
1804
|
+
)
|
|
1805
|
+
)
|
|
1806
|
+
)
|
|
1799
1807
|
tokens.append(token_str)
|
|
1800
1808
|
sorted_logprobs = list(
|
|
1801
1809
|
sorted(
|
|
@@ -1909,7 +1917,7 @@ class Llama:
|
|
|
1909
1917
|
completion_or_chunks = self._create_completion(
|
|
1910
1918
|
prompt=prompt,
|
|
1911
1919
|
suffix=suffix,
|
|
1912
|
-
max_tokens
|
|
1920
|
+
max_tokens=-1 if max_tokens is None else max_tokens,
|
|
1913
1921
|
temperature=temperature,
|
|
1914
1922
|
top_p=top_p,
|
|
1915
1923
|
min_p=min_p,
|
|
@@ -1943,7 +1951,7 @@ class Llama:
|
|
|
1943
1951
|
self,
|
|
1944
1952
|
prompt: str,
|
|
1945
1953
|
suffix: Optional[str] = None,
|
|
1946
|
-
max_tokens: int =
|
|
1954
|
+
max_tokens: Optional[int] = 16,
|
|
1947
1955
|
temperature: float = 0.8,
|
|
1948
1956
|
top_p: float = 0.95,
|
|
1949
1957
|
min_p: float = 0.05,
|
|
@@ -9,6 +9,7 @@ from ctypes import (
|
|
|
9
9
|
c_int32,
|
|
10
10
|
c_uint8,
|
|
11
11
|
c_uint32,
|
|
12
|
+
c_int64,
|
|
12
13
|
c_size_t,
|
|
13
14
|
c_float,
|
|
14
15
|
c_double,
|
|
@@ -16,6 +17,7 @@ from ctypes import (
|
|
|
16
17
|
POINTER,
|
|
17
18
|
_Pointer, # type: ignore
|
|
18
19
|
Structure,
|
|
20
|
+
Union as CtypesUnion,
|
|
19
21
|
Array,
|
|
20
22
|
)
|
|
21
23
|
import pathlib
|
|
@@ -60,6 +62,9 @@ def _load_shared_library(lib_base_name: str):
|
|
|
60
62
|
if "CUDA_PATH" in os.environ:
|
|
61
63
|
os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "bin"))
|
|
62
64
|
os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "lib"))
|
|
65
|
+
if "HIP_PATH" in os.environ:
|
|
66
|
+
os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "bin"))
|
|
67
|
+
os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "lib"))
|
|
63
68
|
cdll_args["winmode"] = ctypes.RTLD_GLOBAL
|
|
64
69
|
|
|
65
70
|
# Try to load the shared library, handling potential errors
|
|
@@ -88,9 +93,7 @@ c_size_t_p = POINTER(c_size_t)
|
|
|
88
93
|
|
|
89
94
|
# llama.h bindings
|
|
90
95
|
|
|
91
|
-
|
|
92
|
-
GGML_CUDA_MAX_DEVICES = 16
|
|
93
|
-
LLAMA_MAX_DEVICES = GGML_CUDA_MAX_DEVICES if GGML_USE_CUBLAS else 1
|
|
96
|
+
LLAMA_MAX_DEVICES = _lib.llama_max_devices()
|
|
94
97
|
|
|
95
98
|
# define LLAMA_DEFAULT_SEED 0xFFFFFFFF
|
|
96
99
|
LLAMA_DEFAULT_SEED = 0xFFFFFFFF
|
|
@@ -252,8 +255,8 @@ class llama_token_data_array(Structure):
|
|
|
252
255
|
|
|
253
256
|
llama_token_data_array_p = POINTER(llama_token_data_array)
|
|
254
257
|
|
|
255
|
-
# typedef
|
|
256
|
-
llama_progress_callback = ctypes.CFUNCTYPE(
|
|
258
|
+
# typedef bool (*llama_progress_callback)(float progress, void *ctx);
|
|
259
|
+
llama_progress_callback = ctypes.CFUNCTYPE(c_bool, c_float, c_void_p)
|
|
257
260
|
|
|
258
261
|
|
|
259
262
|
# // Input data for llama_decode
|
|
@@ -317,12 +320,9 @@ class llama_batch(Structure):
|
|
|
317
320
|
# LLAMA_KV_OVERRIDE_FLOAT,
|
|
318
321
|
# LLAMA_KV_OVERRIDE_BOOL,
|
|
319
322
|
# };
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
("LLAMA_KV_OVERRIDE_FLOAT", c_int),
|
|
324
|
-
("LLAMA_KV_OVERRIDE_BOOL", c_int),
|
|
325
|
-
]
|
|
323
|
+
LLAMA_KV_OVERRIDE_INT = 0
|
|
324
|
+
LLAMA_KV_OVERRIDE_FLOAT = 1
|
|
325
|
+
LLAMA_KV_OVERRIDE_BOOL = 2
|
|
326
326
|
|
|
327
327
|
# struct llama_model_kv_override {
|
|
328
328
|
# char key[128];
|
|
@@ -333,21 +333,28 @@ class llama_model_kv_override_type(Structure):
|
|
|
333
333
|
# bool bool_value;
|
|
334
334
|
# };
|
|
335
335
|
# };
|
|
336
|
-
class
|
|
336
|
+
class llama_model_kv_override_value(CtypesUnion):
|
|
337
337
|
_fields_ = [
|
|
338
|
-
("
|
|
339
|
-
("tag", llama_model_kv_override_type),
|
|
340
|
-
("int_value", ctypes.c_int64),
|
|
338
|
+
("int_value", c_int64),
|
|
341
339
|
("float_value", c_double),
|
|
342
340
|
("bool_value", c_bool),
|
|
343
341
|
]
|
|
344
342
|
|
|
343
|
+
class llama_model_kv_override(Structure):
|
|
344
|
+
_fields_ = [
|
|
345
|
+
("key", ctypes.c_char * 128),
|
|
346
|
+
("tag", c_int),
|
|
347
|
+
("value", llama_model_kv_override_value),
|
|
348
|
+
]
|
|
349
|
+
|
|
345
350
|
# struct llama_model_params {
|
|
346
351
|
# int32_t n_gpu_layers; // number of layers to store in VRAM
|
|
347
352
|
# int32_t main_gpu; // the GPU that is used for scratch and small tensors
|
|
348
353
|
# const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)
|
|
349
354
|
|
|
350
|
-
# //
|
|
355
|
+
# // Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
|
|
356
|
+
# // If the provided progress_callback returns true, model loading continues.
|
|
357
|
+
# // If it returns false, model loading is immediately aborted.
|
|
351
358
|
# llama_progress_callback progress_callback;
|
|
352
359
|
# // context pointer passed to the progress callback
|
|
353
360
|
# void * progress_callback_user_data;
|
|
@@ -367,7 +374,7 @@ class llama_model_params(Structure):
|
|
|
367
374
|
n_gpu_layers (int): number of layers to store in VRAM
|
|
368
375
|
main_gpu (int): the GPU that is used for scratch and small tensors
|
|
369
376
|
tensor_split (ctypes.Array[ctypes.c_float]): how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)
|
|
370
|
-
progress_callback (llama_progress_callback): called with a progress value between 0 and 1
|
|
377
|
+
progress_callback (llama_progress_callback): called with a progress value between 0.0 and 1.0. Pass NULL to disable. If the provided progress_callback returns true, model loading continues. If it returns false, model loading is immediately aborted.
|
|
371
378
|
progress_callback_user_data (ctypes.c_void_p): context pointer passed to the progress callback
|
|
372
379
|
kv_overrides (ctypes.Array[llama_model_kv_override]): override key-value pairs of the model meta data
|
|
373
380
|
vocab_only (bool): only load the vocabulary, no weights
|
|
@@ -733,8 +740,14 @@ def llama_n_ctx(ctx: llama_context_p) -> int:
|
|
|
733
740
|
|
|
734
741
|
|
|
735
742
|
_lib.llama_n_ctx.argtypes = [llama_context_p]
|
|
736
|
-
_lib.llama_n_ctx.restype =
|
|
743
|
+
_lib.llama_n_ctx.restype = c_uint32
|
|
737
744
|
|
|
745
|
+
# LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx);
|
|
746
|
+
def llama_n_batch(ctx: llama_context_p) -> int:
|
|
747
|
+
return _lib.llama_n_batch(ctx)
|
|
748
|
+
|
|
749
|
+
_lib.llama_n_batch.argtypes = [llama_context_p]
|
|
750
|
+
_lib.llama_n_batch.restype = c_uint32
|
|
738
751
|
|
|
739
752
|
# LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_model * model);
|
|
740
753
|
def llama_vocab_type(model: llama_model_p) -> int:
|
|
@@ -1041,6 +1054,9 @@ class llama_kv_cache_view(Structure):
|
|
|
1041
1054
|
]
|
|
1042
1055
|
|
|
1043
1056
|
|
|
1057
|
+
llama_kv_cache_view_p = POINTER(llama_kv_cache_view)
|
|
1058
|
+
|
|
1059
|
+
|
|
1044
1060
|
# // Create an empty KV cache view. (use only for debugging purposes)
|
|
1045
1061
|
# LLAMA_API struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_max_seq);
|
|
1046
1062
|
def llama_kv_cache_view_init(
|
|
@@ -1056,23 +1072,23 @@ _lib.llama_kv_cache_view_init.restype = llama_kv_cache_view
|
|
|
1056
1072
|
|
|
1057
1073
|
# // Free a KV cache view. (use only for debugging purposes)
|
|
1058
1074
|
# LLAMA_API void llama_kv_cache_view_free(struct llama_kv_cache_view * view);
|
|
1059
|
-
def llama_kv_cache_view_free(view: llama_kv_cache_view):
|
|
1075
|
+
def llama_kv_cache_view_free(view: "ctypes.pointer[llama_kv_cache_view]"): # type: ignore
|
|
1060
1076
|
"""Free a KV cache view. (use only for debugging purposes)"""
|
|
1061
1077
|
return _lib.llama_kv_cache_view_free(view)
|
|
1062
1078
|
|
|
1063
1079
|
|
|
1064
|
-
_lib.llama_kv_cache_view_free.argtypes = [
|
|
1080
|
+
_lib.llama_kv_cache_view_free.argtypes = [llama_kv_cache_view_p]
|
|
1065
1081
|
_lib.llama_kv_cache_view_free.restype = None
|
|
1066
1082
|
|
|
1067
1083
|
|
|
1068
1084
|
# // Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
|
|
1069
1085
|
# LLAMA_API void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_kv_cache_view * view);
|
|
1070
|
-
def llama_kv_cache_view_update(ctx: llama_context_p, view: llama_kv_cache_view):
|
|
1086
|
+
def llama_kv_cache_view_update(ctx: llama_context_p, view: "ctypes.pointer[llama_kv_cache_view]"): # type: ignore
|
|
1071
1087
|
"""Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)"""
|
|
1072
1088
|
return _lib.llama_kv_cache_view_update(ctx, view)
|
|
1073
1089
|
|
|
1074
1090
|
|
|
1075
|
-
_lib.llama_kv_cache_view_update.argtypes = [llama_context_p,
|
|
1091
|
+
_lib.llama_kv_cache_view_update.argtypes = [llama_context_p, llama_kv_cache_view_p]
|
|
1076
1092
|
_lib.llama_kv_cache_view_update.restype = None
|
|
1077
1093
|
|
|
1078
1094
|
|