llama-cpp-python 0.2.23__tar.gz → 0.2.25__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_cpp_python-0.2.25/.git/FETCH_HEAD +1 -0
- llama_cpp_python-0.2.25/.git/HEAD +1 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/config +1 -1
- llama_cpp_python-0.2.25/.git/index +0 -0
- llama_cpp_python-0.2.25/.git/logs/HEAD +1 -0
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/HEAD +1 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/config +1 -1
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/index +0 -0
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/logs/HEAD +2 -0
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/logs/refs/heads/master +1 -0
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/logs/refs/remotes/origin/HEAD +1 -0
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/objects/pack/pack-53054ebe4aff53f3127333b245dfe71db566f164.idx +0 -0
- llama_cpp_python-0.2.23/.git/modules/vendor/llama.cpp/objects/pack/pack-12e6daaf60fd1afc320675169bfed78e447d3fb5.pack → llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/objects/pack/pack-53054ebe4aff53f3127333b245dfe71db566f164.pack +0 -0
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/objects/pack/pack-53054ebe4aff53f3127333b245dfe71db566f164.rev +0 -0
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/packed-refs +2 -0
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/refs/heads/master +1 -0
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/refs/tags/b1691 +1 -0
- llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/shallow +1 -0
- llama_cpp_python-0.2.23/.git/objects/bf/9eb24da4238dafc5bef7eabea39b0fcecf5fac → llama_cpp_python-0.2.25/.git/objects/0e/f132b07175867c07ad06fa22ca6b95eca67b59 +0 -0
- llama_cpp_python-0.2.25/.git/objects/13/454a3a6bea90892a42064c32f7a1a60deb0806 +0 -0
- llama_cpp_python-0.2.25/.git/objects/1c/5efea21fad700ef81acb5682eb71efa64c7453 +0 -0
- llama_cpp_python-0.2.25/.git/objects/2b/14bc6783798c56c71db248c5a834c30fbbce21 +0 -0
- llama_cpp_python-0.2.25/.git/objects/34/6b4631ebd1f4af85e9988d4a528e00edba6375 +0 -0
- llama_cpp_python-0.2.25/.git/objects/37/556bf9c4f83f51e76682316ff4ea3aed58a279 +0 -0
- llama_cpp_python-0.2.25/.git/objects/64/b567b4f3142efeae284deeab2342122d7e62bd +0 -0
- llama_cpp_python-0.2.25/.git/objects/78/8732bd3ba7ed71b0e49fb2dfe42d4ed781c0eb +0 -0
- llama_cpp_python-0.2.25/.git/objects/7c/1dececc933fdfba00ee95b5bed81f447a21333 +0 -0
- llama_cpp_python-0.2.23/.git/objects/3d/07614e35e03d55630abf4e92857441fdcaf91f → llama_cpp_python-0.2.25/.git/objects/80/7b0f57a8a873e58ade0ff0f5b0bcf0ff66b7f9 +0 -0
- llama_cpp_python-0.2.25/.git/objects/85/21e7721390edb971bb04098cba2d50446b3d8f +0 -0
- llama_cpp_python-0.2.25/.git/objects/8e/32d2c0edce725a47b5845463133919cd766a61 +0 -0
- llama_cpp_python-0.2.25/.git/objects/97/fe6e180b574c24eb4f07ef229981a3ac478bb7 +0 -0
- llama_cpp_python-0.2.25/.git/objects/9d/3d3559849603efda6f3c8181684e4d19e0ec79 +0 -0
- llama_cpp_python-0.2.25/.git/objects/af/f397f476fb7773d0e89b0e8913c8b1f97ca3e4 +0 -0
- llama_cpp_python-0.2.25/.git/objects/b5/affaa9d6087f3888dd9eedea209bb214b6e135 +0 -0
- llama_cpp_python-0.2.25/.git/objects/b9/373b7ac641e6e9c8d8cc64951139205d91d8bc +0 -0
- llama_cpp_python-0.2.25/.git/objects/c5/4e4eb5ce2636abd78df46a7616cfe9196a1198 +0 -0
- llama_cpp_python-0.2.25/.git/objects/c8/4fd04498c2fb188ff7c2a59473035fc90eb990 +0 -0
- llama_cpp_python-0.2.23/.git/objects/37/670106e61a8a77daff1cc7852e228b07f99293 → llama_cpp_python-0.2.25/.git/objects/ca/e7ebb7a833dafcd402a96bea3a9574f74f0ed5 +0 -0
- llama_cpp_python-0.2.25/.git/objects/cd/351ba33849dcf6af35b493f7405962fa1625d4 +0 -0
- llama_cpp_python-0.2.25/.git/objects/da/c33b74dddf06fcfc01244044eebb102cfcea37 +0 -0
- llama_cpp_python-0.2.25/.git/objects/ef/1b2c0162e8edd321e2b9c1ce375d96f1f1d048 +0 -0
- llama_cpp_python-0.2.25/.git/objects/f0/827d762e852a21f6406c469300899d5f509b8f +0 -0
- llama_cpp_python-0.2.25/.git/objects/fa/dfc5fb4fe6f5eb6d5d98b62519e374a5202b00 +0 -0
- llama_cpp_python-0.2.25/.git/refs/tags/v0.2.25 +1 -0
- llama_cpp_python-0.2.25/.git/shallow +1 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/CHANGELOG.md +20 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/PKG-INFO +6 -2
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/README.md +4 -1
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/docs/server.md +95 -1
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/examples/low_level_api/low_level_api_llama_cpp.py +7 -7
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/llama_cpp/__init__.py +1 -1
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/llama_cpp/llama.py +47 -29
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/llama_cpp/llama_cpp.py +40 -22
- llama_cpp_python-0.2.25/llama_cpp/server/__main__.py +88 -0
- llama_cpp_python-0.2.25/llama_cpp/server/app.py +350 -0
- llama_cpp_python-0.2.25/llama_cpp/server/cli.py +97 -0
- llama_cpp_python-0.2.25/llama_cpp/server/errors.py +210 -0
- llama_cpp_python-0.2.25/llama_cpp/server/model.py +126 -0
- llama_cpp_python-0.2.25/llama_cpp/server/settings.py +167 -0
- llama_cpp_python-0.2.25/llama_cpp/server/types.py +266 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/pyproject.toml +2 -1
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/tests/test_llama.py +25 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.editorconfig +3 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/workflows/docker.yml +32 -2
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.gitignore +1 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/CMakeLists.txt +10 -1
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/Makefile +37 -9
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/README.md +35 -10
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/build-info.cpp +1 -1
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/common.cpp +1 -1
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/common.h +2 -1
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/train.cpp +10 -8
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/convert-hf-to-gguf.py +22 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/convert-lora-to-ggml.py +44 -40
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/convert.py +161 -150
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/CMakeLists.txt +1 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/baby-llama/baby-llama.cpp +12 -21
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/benchmark/benchmark-matmult.cpp +7 -7
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +2 -2
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/export-lora/export-lora.cpp +1 -1
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/finetune/finetune.cpp +28 -31
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/gguf/gguf.cpp +1 -2
- llama_cpp_python-0.2.25/vendor/llama.cpp/examples/llama.swiftui/.gitignore +2 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift +157 -25
- llama_cpp_python-0.2.25/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Models/LlamaState.swift +85 -0
- llama_cpp_python-0.2.25/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift +161 -0
- llama_cpp_python-0.2.25/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/DownloadButton.swift +122 -0
- llama_cpp_python-0.2.25/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj +483 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llava/clip.cpp +4 -10
- llama_cpp_python-0.2.25/vendor/llama.cpp/examples/lookup/CMakeLists.txt +5 -0
- llama_cpp_python-0.2.25/vendor/llama.cpp/examples/lookup/README.md +13 -0
- llama_cpp_python-0.2.25/vendor/llama.cpp/examples/lookup/lookup.cpp +230 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/public/completion.js +2 -1
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/public/index.html +6 -1
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/server.cpp +88 -30
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +5 -9
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-alloc.c +13 -5
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-backend-impl.h +12 -8
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-backend.c +75 -5
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-backend.h +7 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-cuda.cu +303 -169
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-metal.h +3 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-metal.m +190 -44
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-metal.metal +11 -2
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-quants.c +2 -2
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml.c +326 -150
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml.h +37 -16
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/README.md +1 -1
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/gguf/constants.py +13 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/gguf/tensor_mapping.py +8 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/gguf/vocab.py +4 -2
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/llama.cpp +946 -800
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/llama.h +8 -3
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/requirements.txt +2 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-backend-ops.cpp +11 -8
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-grad0.cpp +5 -5
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-quantize-perf.cpp +5 -5
- llama_cpp_python-0.2.23/.git/FETCH_HEAD +0 -1
- llama_cpp_python-0.2.23/.git/HEAD +0 -1
- llama_cpp_python-0.2.23/.git/index +0 -0
- llama_cpp_python-0.2.23/.git/logs/HEAD +0 -1
- llama_cpp_python-0.2.23/.git/modules/vendor/llama.cpp/HEAD +0 -1
- llama_cpp_python-0.2.23/.git/modules/vendor/llama.cpp/index +0 -0
- llama_cpp_python-0.2.23/.git/modules/vendor/llama.cpp/logs/HEAD +0 -2
- llama_cpp_python-0.2.23/.git/modules/vendor/llama.cpp/logs/refs/heads/master +0 -1
- llama_cpp_python-0.2.23/.git/modules/vendor/llama.cpp/logs/refs/remotes/origin/HEAD +0 -1
- llama_cpp_python-0.2.23/.git/modules/vendor/llama.cpp/objects/pack/pack-12e6daaf60fd1afc320675169bfed78e447d3fb5.idx +0 -0
- llama_cpp_python-0.2.23/.git/modules/vendor/llama.cpp/objects/pack/pack-12e6daaf60fd1afc320675169bfed78e447d3fb5.rev +0 -0
- llama_cpp_python-0.2.23/.git/modules/vendor/llama.cpp/packed-refs +0 -2
- llama_cpp_python-0.2.23/.git/modules/vendor/llama.cpp/refs/heads/master +0 -1
- llama_cpp_python-0.2.23/.git/modules/vendor/llama.cpp/refs/tags/b1634 +0 -1
- llama_cpp_python-0.2.23/.git/modules/vendor/llama.cpp/shallow +0 -1
- llama_cpp_python-0.2.23/.git/objects/0a/acdf9e36ea008ab097c18f24fed6677ff9eac5 +0 -0
- llama_cpp_python-0.2.23/.git/objects/19/11b26d9581b239d8d9d2765ee917aee86aa4a0 +0 -0
- llama_cpp_python-0.2.23/.git/objects/1c/a637adc1359ea93448bdbe4038e4dc7a08d04e +0 -0
- llama_cpp_python-0.2.23/.git/objects/29/2378d50150e0ba5c2d5e5f8f56d6c5e7b5b2de +0 -0
- llama_cpp_python-0.2.23/.git/objects/31/332c40d90f5319fd4b040322084e41c80da60e +0 -0
- llama_cpp_python-0.2.23/.git/objects/3d/8bbe142e5dc92f6698fe709c18aa32365e37e3 +0 -0
- llama_cpp_python-0.2.23/.git/objects/45/fc5a8579674a501e1772722b3e1e2c775dd01f +0 -0
- llama_cpp_python-0.2.23/.git/objects/46/9eba2dece02dc7c49f1173fed2a7aa6de25353 +0 -0
- llama_cpp_python-0.2.23/.git/objects/4e/1e5622839e64c024cfbfb9beb903d8ed75849f +0 -0
- llama_cpp_python-0.2.23/.git/objects/6c/10225819e88df6faf38ed409b00e6ae9a4d139 +0 -0
- llama_cpp_python-0.2.23/.git/objects/71/38cf4031e1992dd034704c02d715d42af94d4a +0 -0
- llama_cpp_python-0.2.23/.git/objects/82/ec1985731d5578ba90a3dc3071cbd510829bad +0 -0
- llama_cpp_python-0.2.23/.git/objects/9d/7224d2cedd65921f2968c6c278a8f257c9f160 +0 -0
- llama_cpp_python-0.2.23/.git/objects/c9/8148ec0afecc6ac971d68b13e81f7137a1ebd2 +0 -0
- llama_cpp_python-0.2.23/.git/objects/cb/ce061ffdd6b148fada79cf132683e0dbf11fb6 +0 -0
- llama_cpp_python-0.2.23/.git/objects/e2/273074877e484b127c372a80fde060f1e254ab +0 -0
- llama_cpp_python-0.2.23/.git/objects/ec/499d966b00470947837dcd9d200c47531c0861 +0 -0
- llama_cpp_python-0.2.23/.git/refs/tags/v0.2.23 +0 -1
- llama_cpp_python-0.2.23/.git/shallow +0 -1
- llama_cpp_python-0.2.23/llama_cpp/server/__main__.py +0 -101
- llama_cpp_python-0.2.23/llama_cpp/server/app.py +0 -910
- llama_cpp_python-0.2.23/vendor/llama.cpp/examples/llama.swiftui/.gitignore +0 -1
- llama_cpp_python-0.2.23/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Models/LlamaState.swift +0 -45
- llama_cpp_python-0.2.23/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift +0 -42
- llama_cpp_python-0.2.23/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj +0 -481
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.dockerignore +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/description +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/hooks/applypatch-msg.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/hooks/commit-msg.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/hooks/fsmonitor-watchman.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/hooks/post-update.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/hooks/pre-applypatch.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/hooks/pre-commit.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/hooks/pre-merge-commit.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/hooks/pre-push.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/hooks/pre-rebase.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/hooks/pre-receive.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/hooks/prepare-commit-msg.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/hooks/push-to-checkout.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/hooks/sendemail-validate.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/hooks/update.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/info/exclude +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/description +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/applypatch-msg.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/commit-msg.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/fsmonitor-watchman.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/post-update.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/pre-applypatch.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/pre-commit.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/pre-merge-commit.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/pre-push.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/pre-rebase.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/pre-receive.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/prepare-commit-msg.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/push-to-checkout.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/sendemail-validate.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/update.sample +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/info/exclude +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/refs/remotes/origin/HEAD +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/03/7f96a2dde3c50a29a26eaacf577f4ecac7c027 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/04/5856ea2ffe697ec33db1c1c989bd45cde5bb3d +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/04/7bc14424303575f73af90611fec827334f54e8 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/0e/2a907c0e40de1a432ee0cd2e3d01c634df1a37 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/11/2a0f7872fa9244bf38729a2722dc5c08dec20c +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/11/b6d5c26e3f7157658952b8ec353e985d522fac +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/13/c5b6b0df5f67e80cbe584909b83777901265a1 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/17/1f357f53b0de535157a2c9b4de04582784ec97 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/19/9bd4ffbf88c68c98b52c05f388dfa92716f6b7 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/23/e37d4d40e5ec0bfd85b5e928834d58e2cf0da6 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/24/04228325d88c59b819acaedb20b15635f75df9 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/27/2e4767b47397d1529e6a2d01298144d9d746ba +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/27/a6b1e5042318f85dc99c9f24f4dd6fc472d242 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/29/03e0146d304bcacbfabfe71f171a2edc03043e +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/2c/c6fb02dcb3226c1ecf0fbed6453eb3acadeaa2 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/2e/24903c21db75253911a70039ac5622672db813 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/39/697bfc2538bd4558018d0d721d6d6028c4bb56 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/3a/1d7180d508818fe957923e00dcd8950938632d +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/3a/6457dcdfd47e764654bacae0ba8347976b645a +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/41/cc68ea2402cf682807649d841e7c0f4175db01 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/44/b6d4a35d3caf0c65aef0a1c77ff0ab4077d405 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/47/4503fdfc554d8caabee4f321a80427f8c7d696 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/4b/3189dd1a54be3bc416786ddf184dd047dc4b20 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/51/f357200f8b998031f4be924e11ed2ae4bf3fea +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/55/d08db5f514fb6847938e3d6489b99f737ba6e3 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/56/2410fe1a4c4093d62e1705933eaf66c6d99a2c +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/5b/51e98ce432974ff031367f8937babe755e3d73 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/5d/f12aaf53a0e85f55e1aa0e5167bc831ab32783 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/5f/350ffe99abe7297b2839f36fd0eaeb98887805 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/60/bc7aef42aac0409cfdca666ad2ff6f516d7b5b +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/61/027ef99725c50b0891fdbf0bf263a33abe648f +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/68/7316b327ca038d26a338b3070a7d4698322515 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/74/7c6130e3cb1479d20e2013b1dd3db3379c2266 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/76/e26fbd106895fba52861f8ac1e11cc6ee2a307 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/79/5dad726ae91f330c56e49bc188080b5b3b5ba9 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/79/a9e67a1aee09c6d182f240ba5eef32feabcbce +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/7a/7b899ec7dd48d192cba14ac9c8e9df4233f7fb +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/7d/5498f9d2c49c1f731b47845d845f6c5e16a3d9 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/7d/6c970483161eaf43cfa9d50010c071d4953053 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/7e/df0975dc12ccc95ad14de085f07efe6d65c620 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/7e/e8f748eab47180cea09c0ad8e75c3b991b4af4 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/82/e5c4487e57d6d59c901a73bdd2a9bc172fee7c +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/8b/d03f88a1895cbf3ef249e13df79ee0efda779d +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/90/fdf9b2be7bbfb62c2b1463d6c95cc2d26ebb5c +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/91/abb11fdf507883caeeb2d2958e1c65fb6cbdc1 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/99/32d6130f5552a9b85c8b15b4ac6bc26b1068ce +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/9d/0ec2f705618e591cfa8d6512cb9a96b3da75f1 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/a9/e51cdc1672134ec9af66c9eccf09f6da4ceccd +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/ac/82b8fbb81087ec9b3a72d9e377102a31b28d16 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/ae/c023cd61b3bb83bac968fffdd6aa0810af7c3d +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/b1/f90b91594f496ad9f27b1a68584984f4b523d2 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/b9/1632f5bc787c1404600c894a6a4126359747d8 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/bb/cbbe7d61558adde3cbfd0c7a63a67c27ed6d30 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/c8/5e73d2b657bb05ed99309615d67bac93d9f86e +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/ca/ebbb67fdb02a0a8897d4e4826ea046a9931f6f +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/cc/6a3a7252ea6e698614f0629d4bc040ab6ca717 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/d3/329eec3bac6ce7e54c76b77ac9bf99fab0fe3f +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/e2/13518b95011cb6ee783986624c3b6de8659f81 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/e4/1f375774e6945e445bfb179502b128fe22dda7 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/e9/30609ff5c479a1d5e12a8f3993582d421a6326 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/e9/48f8deb150039c6853e14537a8dd0cc9002b72 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/eb/9a2cfa9167df02f136502af79738c71363abfd +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/f1/b8e9d154231932c4b7b9b59611626764e68632 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/f2/0813765a70679e8a063871c9ef75d75c65ccb6 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/fc/25ff5160028dee3570249abc40cd57780bcca9 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/fc/ef8cd800ee8a265b146748d178cb56b5632bf3 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/fd/64c09b37947c97e58903ce570785c657d56722 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/fe/b0ed68d94eac48b844fd587ddfb808649716a1 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.git/objects/ff/3e950cd1110fe552912cea4c268c4023d2b737 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.github/dependabot.yml +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.github/workflows/build-and-release.yaml +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.github/workflows/build-docker.yaml +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.github/workflows/publish-to-test.yaml +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.github/workflows/publish.yaml +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.github/workflows/test-pypi.yaml +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.github/workflows/test.yaml +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.gitignore +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.gitmodules +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/.readthedocs.yaml +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/LICENSE.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/Makefile +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/docker/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/docker/cuda_simple/Dockerfile +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/docker/open_llama/Dockerfile +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/docker/open_llama/build.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/docker/open_llama/hug_model.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/docker/open_llama/start.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/docker/open_llama/start_server.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/docker/openblas_simple/Dockerfile +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/docker/simple/Dockerfile +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/docker/simple/run.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/docs/api-reference.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/docs/changelog.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/docs/index.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/docs/install/macos.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/docs/requirements.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/examples/high_level_api/fastapi_server.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/examples/high_level_api/high_level_api_embedding.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/examples/high_level_api/high_level_api_inference.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/examples/high_level_api/high_level_api_streaming.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/examples/high_level_api/langchain_custom_llm.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/examples/low_level_api/Chat.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/examples/low_level_api/Miku.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/examples/low_level_api/ReasonAct.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/examples/low_level_api/common.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/examples/low_level_api/low_level_api_chat_cpp.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/examples/low_level_api/quantize.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/examples/low_level_api/readme/low_level_api_llama_cpp.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/examples/low_level_api/util.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/examples/notebooks/Batching.ipynb +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/examples/notebooks/Clients.ipynb +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/examples/notebooks/Functions.ipynb +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/examples/notebooks/Guidance.ipynb +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/examples/notebooks/Multimodal.ipynb +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/examples/notebooks/PerformanceTuning.ipynb +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/llama_cpp/_utils.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/llama_cpp/llama_chat_format.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/llama_cpp/llama_grammar.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/llama_cpp/llama_types.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/llama_cpp/llava_cpp.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/llama_cpp/py.typed +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/llama_cpp/server/__init__.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/mkdocs.yml +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/tests/test_grammar.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.clang-tidy +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.devops/cloud-v-pipeline +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.devops/full-cuda.Dockerfile +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.devops/full-rocm.Dockerfile +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.devops/full.Dockerfile +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.devops/main-cuda.Dockerfile +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.devops/main-rocm.Dockerfile +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.devops/main.Dockerfile +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.devops/tools.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.dockerignore +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.ecrc +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.flake8 +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.git +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/ISSUE_TEMPLATE/bug.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/ISSUE_TEMPLATE/enhancement.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/workflows/build.yml +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/workflows/code-coverage.yml +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/workflows/editorconfig.yml +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/workflows/gguf-publish.yml +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/workflows/python-lint.yml +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/workflows/tidy-post.yml +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/workflows/tidy-review.yml +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/workflows/zig-build.yml +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.pre-commit-config.yaml +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/LICENSE +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/Package.swift +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/SHA256SUMS +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/build.zig +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ci/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ci/run.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/cmake/FindSIMD.cmake +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/codecov.yml +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/base64.hpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/build-info.cpp.in +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/console.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/console.h +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/grammar-parser.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/grammar-parser.h +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/log.h +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/sampling.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/sampling.h +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/stb_image.h +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/train.h +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/convert-llama-ggml-to-gguf.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/convert-persimmon-to-gguf.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/docs/BLIS.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/docs/llama-star/idea-arch.key +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/docs/llama-star/idea-arch.pdf +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/docs/token_generation_performance_tips.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/Miku.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/alpaca.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched/batched.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched-bench/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched-bench/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched-bench/batched-bench.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched.swift/.gitignore +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched.swift/Makefile +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched.swift/Package.swift +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched.swift/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched.swift/Sources/main.swift +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/beam-search/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/beam-search/beam-search.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/benchmark/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/chat-13B.bat +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/chat-13B.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/chat-persistent.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/chat-vicuna.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/chat.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/convert-llama2c-to-ggml/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/embedding/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/embedding/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/embedding/embedding.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/export-lora/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/export-lora/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/finetune/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/finetune/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/finetune/convert-finetune-checkpoint-to-gguf.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/finetune/finetune.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/gpt4all.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/infill/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/infill/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/infill/infill.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/jeopardy/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/jeopardy/graph.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/jeopardy/jeopardy.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/jeopardy/qasheet.csv +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/jeopardy/questions.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/json-schema-to-grammar.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama-bench/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama-bench/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama-bench/llama-bench.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.cpp.swift/bridging-header.h +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/AccentColor.colorset/Contents.json +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/AppIcon.appiconset/Contents.json +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/Contents.json +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Preview Content/Preview Assets.xcassets/Contents.json +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/llama_swiftuiApp.swift +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/contents.xcworkspacedata +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.vim +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama2-13b.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama2.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llava/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llava/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llava/clip.h +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llava/convert-image-encoder-to-gguf.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llava/llava-cli.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llava/llava-surgery.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llava/llava.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llava/llava.h +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llm.vim +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/lookahead/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/lookahead/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/lookahead/lookahead.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/main/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/main/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/main/main.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/main-cmake-pkg/.gitignore +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/main-cmake-pkg/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/make-ggml.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/metal/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/metal/metal.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/parallel/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/parallel/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/parallel/parallel.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/perplexity/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/perplexity/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/perplexity/perplexity.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/quantize/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/quantize/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/quantize/quantize.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/quantize-stats/quantize-stats.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/reason-act.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/api_like_OAI.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/chat-llama2.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/chat.mjs +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/chat.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/completion.js.hpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/deps.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/httplib.h +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/index.html.hpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/index.js.hpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/json-schema-to-grammar.mjs.hpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/json.hpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/public/index.js +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/public/json-schema-to-grammar.mjs +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server-llama2-13B.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/simple/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/simple/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/simple/simple.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/speculative/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/speculative/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/speculative/speculative.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/tokenize/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/tokenize/tokenize.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/train-text-from-scratch/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/flake.lock +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/flake.nix +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-alloc.h +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-cuda.h +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-impl.h +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-mpi.c +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-mpi.h +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-opencl.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-opencl.h +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-quants.h +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/LICENSE +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/examples/writer.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/gguf/__init__.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/gguf/gguf.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/gguf/gguf_reader.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/gguf/gguf_writer.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/gguf/py.typed +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/pyproject.toml +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/scripts/__init__.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/scripts/gguf-convert-endian.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/scripts/gguf-dump.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/scripts/gguf-set-metadata.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/tests/test_gguf.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/grammars/README.md +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/grammars/arithmetic.gbnf +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/grammars/c.gbnf +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/grammars/chess.gbnf +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/grammars/japanese.gbnf +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/grammars/json.gbnf +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/grammars/json_arr.gbnf +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/grammars/list.gbnf +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/media/llama-leader.jpeg +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/media/llama0-banner.png +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/media/llama0-logo.png +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/media/llama1-banner.png +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/media/llama1-logo.png +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/mypy.ini +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/pocs/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/pocs/vdot/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/pocs/vdot/q8dot.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/pocs/vdot/vdot.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/LLM-questions.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/alpaca.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/assistant.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/chat-with-baichuan.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/chat-with-bob.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/chat-with-qwen.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/chat.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/dan-modified.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/dan.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/mnemonics.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/parallel-questions.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/reason-act.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/requirements-hf-to-gguf.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/run_with_preset.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/LlamaConfig.cmake.in +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/build-info.cmake +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/build-info.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/convert-gg.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/gen-build-info-cpp.cmake +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/get-flags.mk +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/get-wikitext-2.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/qnt-all.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/run-all-perf.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/run-all-ppl.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/server-llm.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/sync-ggml.sh +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/verify-checksum-models.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/spm-headers/ggml.h +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/spm-headers/llama.h +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-c.c +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-double-float.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-grammar-parser.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-llama-grammar.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-opt.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-quantize-fns.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-rope.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-sampling.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-tokenizer-0-falcon.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-tokenizer-0-falcon.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-tokenizer-0-llama.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-tokenizer-0-llama.py +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-tokenizer-1-llama.cpp +0 -0
- {llama_cpp_python-0.2.23 → llama_cpp_python-0.2.25}/vendor/llama.cpp/unicode.h +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
37556bf9c4f83f51e76682316ff4ea3aed58a279 '37556bf9c4f83f51e76682316ff4ea3aed58a279' of https://github.com/abetlen/llama-cpp-python
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
37556bf9c4f83f51e76682316ff4ea3aed58a279
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
[gc]
|
|
10
10
|
auto = 0
|
|
11
11
|
[http "https://github.com/"]
|
|
12
|
-
extraheader = AUTHORIZATION: basic
|
|
12
|
+
extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzXzMzOEY0bUxVSzA1UlkwTkhqNDgzaXlqZW8xRVBxMDFBQ1JITQ==
|
|
13
13
|
[submodule "vendor/llama.cpp"]
|
|
14
14
|
active = true
|
|
15
15
|
url = https://github.com/ggerganov/llama.cpp.git
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 37556bf9c4f83f51e76682316ff4ea3aed58a279 runner <runner@fv-az1149-712.p0yfcspwqgdenibuqhnf5ysfnc.dx.internal.cloudapp.net> 1703276562 +0000 checkout: moving from master to refs/tags/v0.2.25
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
7082d24cec35e9ce9147535a2224dfc67ee0a78c
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
[gc]
|
|
14
14
|
auto = 0
|
|
15
15
|
[http "https://github.com/"]
|
|
16
|
-
extraheader = AUTHORIZATION: basic
|
|
16
|
+
extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzXzMzOEY0bUxVSzA1UlkwTkhqNDgzaXlqZW8xRVBxMDFBQ1JITQ==
|
|
17
17
|
[url "https://github.com/"]
|
|
18
18
|
insteadOf = git@github.com:
|
|
19
19
|
insteadOf = org-6826477@github.com:
|
|
Binary file
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 7082d24cec35e9ce9147535a2224dfc67ee0a78c runner <runner@fv-az1149-712.p0yfcspwqgdenibuqhnf5ysfnc.dx.internal.cloudapp.net> 1703276563 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
|
|
2
|
+
7082d24cec35e9ce9147535a2224dfc67ee0a78c 7082d24cec35e9ce9147535a2224dfc67ee0a78c runner <runner@fv-az1149-712.p0yfcspwqgdenibuqhnf5ysfnc.dx.internal.cloudapp.net> 1703276563 +0000 checkout: moving from master to 7082d24cec35e9ce9147535a2224dfc67ee0a78c
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 7082d24cec35e9ce9147535a2224dfc67ee0a78c runner <runner@fv-az1149-712.p0yfcspwqgdenibuqhnf5ysfnc.dx.internal.cloudapp.net> 1703276563 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 7082d24cec35e9ce9147535a2224dfc67ee0a78c runner <runner@fv-az1149-712.p0yfcspwqgdenibuqhnf5ysfnc.dx.internal.cloudapp.net> 1703276563 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
7082d24cec35e9ce9147535a2224dfc67ee0a78c
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
7082d24cec35e9ce9147535a2224dfc67ee0a78c
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
7082d24cec35e9ce9147535a2224dfc67ee0a78c
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
37556bf9c4f83f51e76682316ff4ea3aed58a279
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
37556bf9c4f83f51e76682316ff4ea3aed58a279
|
|
@@ -7,6 +7,26 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.2.25]
|
|
11
|
+
|
|
12
|
+
- feat(server): Multi model support by @D4ve-R in #931
|
|
13
|
+
- feat(server): Support none defaulting to infinity for completions by @swg in #111
|
|
14
|
+
- feat(server): Implement openai api compatible authentication by @docmeth2 in #1010
|
|
15
|
+
- fix: text_offset of multi-token characters by @twaka in #1037
|
|
16
|
+
- fix: ctypes bindings for kv override by @phiharri in #1011
|
|
17
|
+
- fix: ctypes definitions of llama_kv_cache_view_update and llama_kv_cache_view_free. by @e-c-d in #1028
|
|
18
|
+
|
|
19
|
+
## [0.2.24]
|
|
20
|
+
|
|
21
|
+
- feat: Update llama.cpp to ggerganov/llama.cpp@0e18b2e7d0b5c0a509ea40098def234b8d4a938a
|
|
22
|
+
- feat: Add offload_kqv option to llama and server by @abetlen in 095c65000642a3cf73055d7428232fb18b73c6f3
|
|
23
|
+
- feat: n_ctx=0 now uses the n_ctx_train of the model by @DanieleMorotti in #1015
|
|
24
|
+
- feat: logits_to_logprobs supports both 2-D and 3-D logits arrays by @kddubey in #1002
|
|
25
|
+
- fix: Remove f16_kv, add offload_kqv fields in low level and llama apis by @brandonrobertz in #1019
|
|
26
|
+
- perf: Don't convert logprobs arrays to lists by @kddubey in #1021
|
|
27
|
+
- docs: Fix README.md functionary demo typo by @evelynmitchell in #996
|
|
28
|
+
- examples: Update low_level_api_llama_cpp.py to match current API by @jsoma in #1023
|
|
29
|
+
|
|
10
30
|
## [0.2.23]
|
|
11
31
|
|
|
12
32
|
- Update llama.cpp to ggerganov/llama.cpp@948ff137ec37f1ec74c02905917fa0afc9b97514
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: llama_cpp_python
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.25
|
|
4
4
|
Summary: Python bindings for the llama.cpp library
|
|
5
5
|
Author-Email: Andrei Betlen <abetlen@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -25,6 +25,7 @@ Requires-Dist: sse-starlette>=1.6.1; extra == "server"
|
|
|
25
25
|
Requires-Dist: starlette-context<0.4,>=0.3.6; extra == "server"
|
|
26
26
|
Requires-Dist: pytest>=7.4.0; extra == "test"
|
|
27
27
|
Requires-Dist: httpx>=0.24.1; extra == "test"
|
|
28
|
+
Requires-Dist: scipy>=1.10; extra == "test"
|
|
28
29
|
Requires-Dist: black>=23.3.0; extra == "dev"
|
|
29
30
|
Requires-Dist: twine>=4.0.2; extra == "dev"
|
|
30
31
|
Requires-Dist: mkdocs>=1.4.3; extra == "dev"
|
|
@@ -59,11 +60,13 @@ This package provides:
|
|
|
59
60
|
- [Local Copilot replacement](https://llama-cpp-python.readthedocs.io/en/latest/server/#code-completion)
|
|
60
61
|
- [Function Calling support](https://llama-cpp-python.readthedocs.io/en/latest/server/#function-calling)
|
|
61
62
|
- [Vision API support](https://llama-cpp-python.readthedocs.io/en/latest/server/#multimodal-models)
|
|
63
|
+
- [Multiple Models](https://llama-cpp-python.readthedocs.io/en/latest/server/#configuration-and-multi-model-support)
|
|
62
64
|
|
|
63
65
|
Documentation is available at [https://llama-cpp-python.readthedocs.io/en/latest](https://llama-cpp-python.readthedocs.io/en/latest).
|
|
64
66
|
|
|
65
67
|
|
|
66
68
|
|
|
69
|
+
|
|
67
70
|
## Installation
|
|
68
71
|
|
|
69
72
|
`llama-cpp-python` can be installed directly from PyPI as a source distribution by running:
|
|
@@ -261,7 +264,7 @@ The gguf-converted files for this model can be found here: [functionary-7b-v1](h
|
|
|
261
264
|
"function": {
|
|
262
265
|
"name": "UserDetail",
|
|
263
266
|
"parameters": {
|
|
264
|
-
"type": "object"
|
|
267
|
+
"type": "object",
|
|
265
268
|
"title": "UserDetail",
|
|
266
269
|
"properties": {
|
|
267
270
|
"name": {
|
|
@@ -373,6 +376,7 @@ For possible options, see [llama_cpp/llama_chat_format.py](llama_cpp/llama_chat_
|
|
|
373
376
|
- [Local Copilot replacement](https://llama-cpp-python.readthedocs.io/en/latest/server/#code-completion)
|
|
374
377
|
- [Function Calling support](https://llama-cpp-python.readthedocs.io/en/latest/server/#function-calling)
|
|
375
378
|
- [Vision API support](https://llama-cpp-python.readthedocs.io/en/latest/server/#multimodal-models)
|
|
379
|
+
- [Multiple Models](https://llama-cpp-python.readthedocs.io/en/latest/server/#configuration-and-multi-model-support)
|
|
376
380
|
|
|
377
381
|
## Docker image
|
|
378
382
|
|
|
@@ -18,11 +18,13 @@ This package provides:
|
|
|
18
18
|
- [Local Copilot replacement](https://llama-cpp-python.readthedocs.io/en/latest/server/#code-completion)
|
|
19
19
|
- [Function Calling support](https://llama-cpp-python.readthedocs.io/en/latest/server/#function-calling)
|
|
20
20
|
- [Vision API support](https://llama-cpp-python.readthedocs.io/en/latest/server/#multimodal-models)
|
|
21
|
+
- [Multiple Models](https://llama-cpp-python.readthedocs.io/en/latest/server/#configuration-and-multi-model-support)
|
|
21
22
|
|
|
22
23
|
Documentation is available at [https://llama-cpp-python.readthedocs.io/en/latest](https://llama-cpp-python.readthedocs.io/en/latest).
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
|
|
27
|
+
|
|
26
28
|
## Installation
|
|
27
29
|
|
|
28
30
|
`llama-cpp-python` can be installed directly from PyPI as a source distribution by running:
|
|
@@ -220,7 +222,7 @@ The gguf-converted files for this model can be found here: [functionary-7b-v1](h
|
|
|
220
222
|
"function": {
|
|
221
223
|
"name": "UserDetail",
|
|
222
224
|
"parameters": {
|
|
223
|
-
"type": "object"
|
|
225
|
+
"type": "object",
|
|
224
226
|
"title": "UserDetail",
|
|
225
227
|
"properties": {
|
|
226
228
|
"name": {
|
|
@@ -332,6 +334,7 @@ For possible options, see [llama_cpp/llama_chat_format.py](llama_cpp/llama_chat_
|
|
|
332
334
|
- [Local Copilot replacement](https://llama-cpp-python.readthedocs.io/en/latest/server/#code-completion)
|
|
333
335
|
- [Function Calling support](https://llama-cpp-python.readthedocs.io/en/latest/server/#function-calling)
|
|
334
336
|
- [Vision API support](https://llama-cpp-python.readthedocs.io/en/latest/server/#multimodal-models)
|
|
337
|
+
- [Multiple Models](https://llama-cpp-python.readthedocs.io/en/latest/server/#configuration-and-multi-model-support)
|
|
335
338
|
|
|
336
339
|
## Docker image
|
|
337
340
|
|
|
@@ -32,6 +32,12 @@ python3 -m llama_cpp.server --help
|
|
|
32
32
|
|
|
33
33
|
NOTE: All server options are also available as environment variables. For example, `--model` can be set by setting the `MODEL` environment variable.
|
|
34
34
|
|
|
35
|
+
Check out the server config reference below settings for more information on the available options.
|
|
36
|
+
CLI arguments and environment variables are available for all of the fields defined in [`ServerSettings`](#llama_cpp.server.settings.ServerSettings) and [`ModelSettings`](#llama_cpp.server.settings.ModelSettings)
|
|
37
|
+
|
|
38
|
+
Additionally the server supports configuration check out the [configuration section](#configuration-and-multi-model-support) for more information and examples.
|
|
39
|
+
|
|
40
|
+
|
|
35
41
|
## Guides
|
|
36
42
|
|
|
37
43
|
### Code Completion
|
|
@@ -121,4 +127,92 @@ response = client.chat.completions.create(
|
|
|
121
127
|
],
|
|
122
128
|
)
|
|
123
129
|
print(response)
|
|
124
|
-
```
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## Configuration and Multi-Model Support
|
|
133
|
+
|
|
134
|
+
The server supports configuration via a JSON config file that can be passed using the `--config_file` parameter or the `CONFIG_FILE` environment variable.
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
python3 -m llama_cpp.server --config_file <config_file>
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Config files support all of the server and model options supported by the cli and environment variables however instead of only a single model the config file can specify multiple models.
|
|
141
|
+
|
|
142
|
+
The server supports routing requests to multiple models based on the `model` parameter in the request which matches against the `model_alias` in the config file.
|
|
143
|
+
|
|
144
|
+
At the moment only a single model is loaded into memory at, the server will automatically load and unload models as needed.
|
|
145
|
+
|
|
146
|
+
```json
|
|
147
|
+
{
|
|
148
|
+
"host": "0.0.0.0",
|
|
149
|
+
"port": 8080,
|
|
150
|
+
"models": [
|
|
151
|
+
{
|
|
152
|
+
"model": "models/OpenHermes-2.5-Mistral-7B-GGUF/openhermes-2.5-mistral-7b.Q4_K_M.gguf",
|
|
153
|
+
"model_alias": "gpt-3.5-turbo",
|
|
154
|
+
"chat_format": "chatml",
|
|
155
|
+
"n_gpu_layers": -1,
|
|
156
|
+
"offload_kqv": true,
|
|
157
|
+
"n_threads": 12,
|
|
158
|
+
"n_batch": 512,
|
|
159
|
+
"n_ctx": 2048
|
|
160
|
+
},
|
|
161
|
+
{
|
|
162
|
+
"model": "models/OpenHermes-2.5-Mistral-7B-GGUF/openhermes-2.5-mistral-7b.Q4_K_M.gguf",
|
|
163
|
+
"model_alias": "gpt-4",
|
|
164
|
+
"chat_format": "chatml",
|
|
165
|
+
"n_gpu_layers": -1,
|
|
166
|
+
"offload_kqv": true,
|
|
167
|
+
"n_threads": 12,
|
|
168
|
+
"n_batch": 512,
|
|
169
|
+
"n_ctx": 2048
|
|
170
|
+
},
|
|
171
|
+
{
|
|
172
|
+
"model": "models/ggml_llava-v1.5-7b/ggml-model-q4_k.gguf",
|
|
173
|
+
"model_alias": "gpt-4-vision-preview",
|
|
174
|
+
"chat_format": "llava-1-5",
|
|
175
|
+
"clip_model_path": "models/ggml_llava-v1.5-7b/mmproj-model-f16.gguf",
|
|
176
|
+
"n_gpu_layers": -1,
|
|
177
|
+
"offload_kqv": true,
|
|
178
|
+
"n_threads": 12,
|
|
179
|
+
"n_batch": 512,
|
|
180
|
+
"n_ctx": 2048
|
|
181
|
+
},
|
|
182
|
+
{
|
|
183
|
+
"model": "models/mistral-7b-v0.1-GGUF/ggml-model-Q4_K.gguf",
|
|
184
|
+
"model_alias": "text-davinci-003",
|
|
185
|
+
"n_gpu_layers": -1,
|
|
186
|
+
"offload_kqv": true,
|
|
187
|
+
"n_threads": 12,
|
|
188
|
+
"n_batch": 512,
|
|
189
|
+
"n_ctx": 2048
|
|
190
|
+
},
|
|
191
|
+
{
|
|
192
|
+
"model": "models/replit-code-v1_5-3b-GGUF/replit-code-v1_5-3b.Q4_0.gguf",
|
|
193
|
+
"model_alias": "copilot-codex",
|
|
194
|
+
"n_gpu_layers": -1,
|
|
195
|
+
"offload_kqv": true,
|
|
196
|
+
"n_threads": 12,
|
|
197
|
+
"n_batch": 1024,
|
|
198
|
+
"n_ctx": 9216
|
|
199
|
+
}
|
|
200
|
+
]
|
|
201
|
+
}
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
The config file format is defined by the [`ConfigFileSettings`](#llama_cpp.server.settings.ConfigFileSettings) class.
|
|
205
|
+
|
|
206
|
+
## Server Options Reference
|
|
207
|
+
|
|
208
|
+
::: llama_cpp.server.settings.ConfigFileSettings
|
|
209
|
+
options:
|
|
210
|
+
show_if_no_docstring: true
|
|
211
|
+
|
|
212
|
+
::: llama_cpp.server.settings.ServerSettings
|
|
213
|
+
options:
|
|
214
|
+
show_if_no_docstring: true
|
|
215
|
+
|
|
216
|
+
::: llama_cpp.server.settings.ModelSettings
|
|
217
|
+
options:
|
|
218
|
+
show_if_no_docstring: true
|
|
@@ -73,7 +73,7 @@ while remaining_tokens > 0:
|
|
|
73
73
|
embd = []
|
|
74
74
|
if len(embd_inp) <= input_consumed:
|
|
75
75
|
logits = llama_cpp.llama_get_logits(ctx)
|
|
76
|
-
n_vocab = llama_cpp.llama_n_vocab(
|
|
76
|
+
n_vocab = llama_cpp.llama_n_vocab(model)
|
|
77
77
|
|
|
78
78
|
_arr = (llama_cpp.llama_token_data * n_vocab)(*[
|
|
79
79
|
llama_cpp.llama_token_data(token_id, logits[token_id], 0.0)
|
|
@@ -83,12 +83,12 @@ while remaining_tokens > 0:
|
|
|
83
83
|
llama_cpp.llama_token_data_array(_arr, len(_arr), False))
|
|
84
84
|
|
|
85
85
|
_arr = (llama_cpp.c_int * len(last_n_tokens_data))(*last_n_tokens_data)
|
|
86
|
-
llama_cpp.
|
|
86
|
+
llama_cpp.llama_sample_repetition_penalties(ctx, candidates_p,
|
|
87
87
|
_arr,
|
|
88
|
-
last_n_repeat,
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
88
|
+
penalty_last_n=last_n_repeat,
|
|
89
|
+
penalty_repeat=repeat_penalty,
|
|
90
|
+
penalty_freq=frequency_penalty,
|
|
91
|
+
penalty_present=presence_penalty)
|
|
92
92
|
|
|
93
93
|
llama_cpp.llama_sample_top_k(ctx, candidates_p, k=40, min_keep=1)
|
|
94
94
|
llama_cpp.llama_sample_top_p(ctx, candidates_p, p=0.8, min_keep=1)
|
|
@@ -126,4 +126,4 @@ print()
|
|
|
126
126
|
|
|
127
127
|
llama_cpp.llama_print_timings(ctx)
|
|
128
128
|
|
|
129
|
-
llama_cpp.llama_free(ctx)
|
|
129
|
+
llama_cpp.llama_free(ctx)
|
|
@@ -2,7 +2,6 @@ import os
|
|
|
2
2
|
import sys
|
|
3
3
|
import uuid
|
|
4
4
|
import time
|
|
5
|
-
import math
|
|
6
5
|
import multiprocessing
|
|
7
6
|
from abc import ABC, abstractmethod
|
|
8
7
|
from typing import (
|
|
@@ -751,9 +750,9 @@ class Llama:
|
|
|
751
750
|
yarn_beta_slow: float = 1.0,
|
|
752
751
|
yarn_orig_ctx: int = 0,
|
|
753
752
|
mul_mat_q: bool = True,
|
|
754
|
-
f16_kv: bool = True,
|
|
755
753
|
logits_all: bool = False,
|
|
756
754
|
embedding: bool = False,
|
|
755
|
+
offload_kqv: bool = False,
|
|
757
756
|
# Sampling Params
|
|
758
757
|
last_n_tokens_size: int = 64,
|
|
759
758
|
# LoRA Params
|
|
@@ -771,7 +770,7 @@ class Llama:
|
|
|
771
770
|
**kwargs, # type: ignore
|
|
772
771
|
):
|
|
773
772
|
"""Load a llama.cpp model from `model_path`.
|
|
774
|
-
|
|
773
|
+
|
|
775
774
|
Examples:
|
|
776
775
|
Basic usage
|
|
777
776
|
|
|
@@ -817,9 +816,9 @@ class Llama:
|
|
|
817
816
|
yarn_beta_fast: YaRN low correction dim
|
|
818
817
|
yarn_beta_slow: YaRN high correction dim
|
|
819
818
|
yarn_orig_ctx: YaRN original context size
|
|
820
|
-
f16_kv: Use fp16 for KV cache, fp32 otherwise
|
|
821
819
|
logits_all: Return logits for all tokens, not just the last token. Must be True for completion to return logprobs.
|
|
822
820
|
embedding: Embedding mode only.
|
|
821
|
+
offload_kqv: Offload K, Q, V to GPU.
|
|
823
822
|
last_n_tokens_size: Maximum number of tokens to keep in the last_n_tokens deque.
|
|
824
823
|
lora_base: Optional path to base model, useful if using a quantized base model and you want to apply LoRA to an f16 model.
|
|
825
824
|
lora_path: Path to a LoRA file to apply to the model.
|
|
@@ -904,9 +903,9 @@ class Llama:
|
|
|
904
903
|
)
|
|
905
904
|
self.context_params.yarn_orig_ctx = yarn_orig_ctx if yarn_orig_ctx != 0 else 0
|
|
906
905
|
self.context_params.mul_mat_q = mul_mat_q
|
|
907
|
-
# self.context_params.f16_kv = f16_kv
|
|
908
906
|
self.context_params.logits_all = logits_all
|
|
909
907
|
self.context_params.embedding = embedding
|
|
908
|
+
self.context_params.offload_kqv = offload_kqv
|
|
910
909
|
|
|
911
910
|
# Sampling Params
|
|
912
911
|
self.last_n_tokens_size = last_n_tokens_size
|
|
@@ -923,6 +922,12 @@ class Llama:
|
|
|
923
922
|
self._model = _LlamaModel(
|
|
924
923
|
path_model=self.model_path, params=self.model_params, verbose=self.verbose
|
|
925
924
|
)
|
|
925
|
+
# Set the default value for the context and correct the batch
|
|
926
|
+
if n_ctx == 0:
|
|
927
|
+
n_ctx = self._model.n_ctx_train()
|
|
928
|
+
self.n_batch = min(n_ctx, n_batch)
|
|
929
|
+
self.context_params.n_ctx = self._model.n_ctx_train()
|
|
930
|
+
self.context_params.n_batch = self.n_batch
|
|
926
931
|
|
|
927
932
|
self._ctx = _LlamaContext(
|
|
928
933
|
model=self._model,
|
|
@@ -1546,11 +1551,13 @@ class Llama:
|
|
|
1546
1551
|
"utf-8", errors="ignore"
|
|
1547
1552
|
)
|
|
1548
1553
|
text_offset = len(prompt) + len(
|
|
1549
|
-
self.detokenize(completion_tokens[:returned_tokens])
|
|
1554
|
+
self.detokenize(completion_tokens[:returned_tokens]).decode(
|
|
1555
|
+
"utf-8", errors="ignore"
|
|
1556
|
+
)
|
|
1550
1557
|
)
|
|
1551
1558
|
token_offset = len(prompt_tokens) + returned_tokens
|
|
1552
|
-
logits = self._scores[token_offset - 1, :]
|
|
1553
|
-
current_logprobs = Llama.logits_to_logprobs(logits)
|
|
1559
|
+
logits = self._scores[token_offset - 1, :]
|
|
1560
|
+
current_logprobs = Llama.logits_to_logprobs(logits).tolist()
|
|
1554
1561
|
sorted_logprobs = list(
|
|
1555
1562
|
sorted(
|
|
1556
1563
|
zip(current_logprobs, range(len(current_logprobs))),
|
|
@@ -1668,8 +1675,8 @@ class Llama:
|
|
|
1668
1675
|
self.detokenize(completion_tokens[:returned_tokens])
|
|
1669
1676
|
)
|
|
1670
1677
|
token_offset = len(prompt_tokens) + returned_tokens - 1
|
|
1671
|
-
logits = self._scores[token_offset, :]
|
|
1672
|
-
current_logprobs = Llama.logits_to_logprobs(logits)
|
|
1678
|
+
logits = self._scores[token_offset, :]
|
|
1679
|
+
current_logprobs = Llama.logits_to_logprobs(logits).tolist()
|
|
1673
1680
|
sorted_logprobs = list(
|
|
1674
1681
|
sorted(
|
|
1675
1682
|
zip(current_logprobs, range(len(current_logprobs))),
|
|
@@ -1782,16 +1789,21 @@ class Llama:
|
|
|
1782
1789
|
self.detokenize([token]).decode("utf-8", errors="ignore")
|
|
1783
1790
|
for token in all_tokens
|
|
1784
1791
|
]
|
|
1785
|
-
all_logprobs = [
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
all_tokens, all_token_strs, all_logprobs
|
|
1792
|
+
all_logprobs = Llama.logits_to_logprobs(self._scores)[token_offset:]
|
|
1793
|
+
# TODO: may be able to change this loop to use np.take_along_dim
|
|
1794
|
+
for idx, (token, token_str, logprobs_token) in enumerate(
|
|
1795
|
+
zip(all_tokens, all_token_strs, all_logprobs)
|
|
1790
1796
|
):
|
|
1791
1797
|
if token == self.token_bos():
|
|
1792
1798
|
continue
|
|
1793
|
-
text_offsets.append(
|
|
1794
|
-
|
|
1799
|
+
text_offsets.append(
|
|
1800
|
+
text_offset
|
|
1801
|
+
+ len(
|
|
1802
|
+
self.detokenize(all_tokens[:idx]).decode(
|
|
1803
|
+
"utf-8", errors="ignore"
|
|
1804
|
+
)
|
|
1805
|
+
)
|
|
1806
|
+
)
|
|
1795
1807
|
tokens.append(token_str)
|
|
1796
1808
|
sorted_logprobs = list(
|
|
1797
1809
|
sorted(
|
|
@@ -1905,7 +1917,7 @@ class Llama:
|
|
|
1905
1917
|
completion_or_chunks = self._create_completion(
|
|
1906
1918
|
prompt=prompt,
|
|
1907
1919
|
suffix=suffix,
|
|
1908
|
-
max_tokens
|
|
1920
|
+
max_tokens=-1 if max_tokens is None else max_tokens,
|
|
1909
1921
|
temperature=temperature,
|
|
1910
1922
|
top_p=top_p,
|
|
1911
1923
|
min_p=min_p,
|
|
@@ -1939,7 +1951,7 @@ class Llama:
|
|
|
1939
1951
|
self,
|
|
1940
1952
|
prompt: str,
|
|
1941
1953
|
suffix: Optional[str] = None,
|
|
1942
|
-
max_tokens: int =
|
|
1954
|
+
max_tokens: Optional[int] = 16,
|
|
1943
1955
|
temperature: float = 0.8,
|
|
1944
1956
|
top_p: float = 0.95,
|
|
1945
1957
|
min_p: float = 0.05,
|
|
@@ -2149,7 +2161,6 @@ class Llama:
|
|
|
2149
2161
|
yarn_beta_slow=self.context_params.yarn_beta_slow,
|
|
2150
2162
|
yarn_orig_ctx=self.context_params.yarn_orig_ctx,
|
|
2151
2163
|
mul_mat_q=self.context_params.mul_mat_q,
|
|
2152
|
-
f16_kv=self.context_params.f16_kv,
|
|
2153
2164
|
logits_all=self.context_params.logits_all,
|
|
2154
2165
|
embedding=self.context_params.embedding,
|
|
2155
2166
|
# Sampling Params
|
|
@@ -2192,7 +2203,6 @@ class Llama:
|
|
|
2192
2203
|
yarn_beta_slow=state["yarn_beta_slow"],
|
|
2193
2204
|
yarn_orig_ctx=state["yarn_orig_ctx"],
|
|
2194
2205
|
mul_mat_q=state["mul_mat_q"],
|
|
2195
|
-
f16_kv=state["f16_kv"],
|
|
2196
2206
|
logits_all=state["logits_all"],
|
|
2197
2207
|
embedding=state["embedding"],
|
|
2198
2208
|
# Sampling Params
|
|
@@ -2280,14 +2290,22 @@ class Llama:
|
|
|
2280
2290
|
return self._model.token_nl()
|
|
2281
2291
|
|
|
2282
2292
|
@staticmethod
|
|
2283
|
-
def logits_to_logprobs(
|
|
2284
|
-
|
|
2285
|
-
|
|
2286
|
-
|
|
2287
|
-
|
|
2288
|
-
|
|
2289
|
-
|
|
2290
|
-
|
|
2293
|
+
def logits_to_logprobs(
|
|
2294
|
+
logits: Union[npt.NDArray[np.single], List], axis: int = -1
|
|
2295
|
+
) -> npt.NDArray[np.single]:
|
|
2296
|
+
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.log_softmax.html
|
|
2297
|
+
logits_maxs: np.ndarray = np.amax(logits, axis=axis, keepdims=True)
|
|
2298
|
+
if logits_maxs.ndim > 0:
|
|
2299
|
+
logits_maxs[~np.isfinite(logits_maxs)] = 0
|
|
2300
|
+
elif not np.isfinite(logits_maxs):
|
|
2301
|
+
logits_maxs = 0
|
|
2302
|
+
subtract_maxs = np.subtract(logits, logits_maxs, dtype=np.single)
|
|
2303
|
+
exp = np.exp(subtract_maxs)
|
|
2304
|
+
# Suppress warnings about log of zero
|
|
2305
|
+
with np.errstate(divide="ignore"):
|
|
2306
|
+
summed = np.sum(exp, axis=axis, keepdims=True)
|
|
2307
|
+
out = np.log(summed)
|
|
2308
|
+
return subtract_maxs - out
|
|
2291
2309
|
|
|
2292
2310
|
@staticmethod
|
|
2293
2311
|
def longest_token_prefix(a: Sequence[int], b: Sequence[int]):
|