llama-cpp-python 0.2.36__tar.gz → 0.2.38__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_cpp_python-0.2.38/.git/FETCH_HEAD +1 -0
- llama_cpp_python-0.2.38/.git/HEAD +1 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/config +1 -1
- llama_cpp_python-0.2.38/.git/index +0 -0
- llama_cpp_python-0.2.38/.git/logs/HEAD +1 -0
- llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/HEAD +1 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/config +1 -1
- llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/index +0 -0
- llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/logs/HEAD +2 -0
- llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/logs/refs/heads/master +1 -0
- llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/logs/refs/remotes/origin/HEAD +1 -0
- llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/objects/pack/pack-840f4459d494ce7fd10b79596f309b54b31652b8.idx +0 -0
- llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/objects/pack/pack-45c5d7da4d130e32bb1f98f3b58ea9cd2784fad3.pack → llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/objects/pack/pack-840f4459d494ce7fd10b79596f309b54b31652b8.pack +0 -0
- llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/objects/pack/pack-840f4459d494ce7fd10b79596f309b54b31652b8.rev +0 -0
- llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/packed-refs +2 -0
- llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/refs/heads/master +1 -0
- llama_cpp_python-0.2.38/.git/modules/vendor/llama.cpp/shallow +1 -0
- llama_cpp_python-0.2.38/.git/objects/pack/pack-a38d8e19feb0b2901a657ea0b79846878599b3fc.idx +0 -0
- llama_cpp_python-0.2.38/.git/objects/pack/pack-a38d8e19feb0b2901a657ea0b79846878599b3fc.pack +0 -0
- llama_cpp_python-0.2.38/.git/objects/pack/pack-a38d8e19feb0b2901a657ea0b79846878599b3fc.rev +0 -0
- llama_cpp_python-0.2.38/.git/refs/tags/v0.2.38 +1 -0
- llama_cpp_python-0.2.38/.git/shallow +1 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/CHANGELOG.md +11 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/Makefile +6 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/PKG-INFO +57 -22
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/README.md +56 -21
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/examples/high_level_api/fastapi_server.py +1 -1
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/llama_cpp/__init__.py +1 -1
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/llama_cpp/_internals.py +2 -2
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/llama_cpp/llama.py +127 -91
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/llama_cpp/llama_chat_format.py +37 -10
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/llama_cpp/llama_cpp.py +36 -7
- llama_cpp_python-0.2.38/llama_cpp/llama_speculative.py +64 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/llama_cpp/server/model.py +9 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/llama_cpp/server/settings.py +11 -2
- llama_cpp_python-0.2.38/tests/test_llama_speculative.py +16 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.ecrc +1 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/build.yml +47 -1
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/editorconfig.yml +6 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.gitignore +1 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/.gitmodules +3 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/CMakeLists.txt +169 -14
- llama_cpp_python-0.2.36/vendor/llama.cpp/README_sycl.md → llama_cpp_python-0.2.38/vendor/llama.cpp/README-sycl.md +184 -10
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/README.md +10 -8
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/build-info.cpp +1 -1
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/common.cpp +30 -28
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/common.h +34 -34
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/train.cpp +6 -6
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched-bench/batched-bench.cpp +1 -1
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama-bench/llama-bench.cpp +19 -12
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/MobileVLM-README.md +56 -2
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/main/main.cpp +18 -10
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/quantize/quantize.cpp +2 -1
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/README.md +85 -45
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/chat.sh +1 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/server.cpp +73 -64
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/sycl/ls-sycl-device.cpp +6 -4
- llama_cpp_python-0.2.38/vendor/llama.cpp/examples/sycl/win-build-sycl.bat +23 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/examples/sycl/win-run-llama2.bat +13 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-alloc.c +1 -1
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-backend.c +5 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-cuda.cu +466 -76
- llama_cpp_python-0.2.38/vendor/llama.cpp/ggml-kompute.cpp +1990 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/ggml-kompute.h +46 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-metal.h +3 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-metal.m +81 -9
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-metal.metal +303 -4
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-quants.c +706 -15
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-quants.h +17 -1
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-sycl.cpp +11 -4
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-sycl.h +5 -4
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-vulkan-shaders.hpp +590 -1162
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-vulkan.cpp +263 -176
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml.c +272 -52
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml.h +5 -1
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml_vk_generate_shaders.py +2 -2
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/common.comp +102 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_add.comp +58 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_addrow.comp +25 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_cpy_f16_f16.comp +52 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_cpy_f16_f32.comp +52 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_cpy_f32_f16.comp +52 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_cpy_f32_f32.comp +52 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_diagmask.comp +30 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_gelu.comp +22 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_getrows.comp +17 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_getrows_f16.comp +31 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_getrows_q4_0.comp +38 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_getrows_q4_1.comp +39 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_getrows_q6_k.comp +44 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_mul.comp +52 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_mul_mat_f16.comp +67 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_mul_mat_mat_f32.comp +51 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_mul_mat_q4_0.comp +33 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_mul_mat_q4_1.comp +35 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_mul_mat_q6_k.comp +94 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_mul_mat_q8_0.comp +73 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_mul_mv_q_n.comp +48 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_mul_mv_q_n_pre.comp +22 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_norm.comp +84 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_relu.comp +21 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_rmsnorm.comp +53 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_rope_f16.comp +73 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_rope_f32.comp +73 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_scale.comp +19 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_scale_8.comp +23 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_silu.comp +22 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/op_softmax.comp +56 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/kompute-shaders/rope_common.comp +67 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/llama.cpp +208 -136
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/llama.h +11 -18
- llama_cpp_python-0.2.38/vendor/llama.cpp/scripts/install-oneapi.bat +19 -0
- llama_cpp_python-0.2.38/vendor/llama.cpp/scripts/sync-ggml.last +1 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-backend-ops.cpp +496 -15
- llama_cpp_python-0.2.38/vendor/llama.cpp/tests/test-c.c +7 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-quantize-fns.cpp +10 -3
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-quantize-perf.cpp +2 -0
- llama_cpp_python-0.2.36/.git/FETCH_HEAD +0 -1
- llama_cpp_python-0.2.36/.git/HEAD +0 -1
- llama_cpp_python-0.2.36/.git/index +0 -0
- llama_cpp_python-0.2.36/.git/logs/HEAD +0 -1
- llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/HEAD +0 -1
- llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/index +0 -0
- llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/logs/HEAD +0 -2
- llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/logs/refs/heads/master +0 -1
- llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/logs/refs/remotes/origin/HEAD +0 -1
- llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/objects/pack/pack-45c5d7da4d130e32bb1f98f3b58ea9cd2784fad3.idx +0 -0
- llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/objects/pack/pack-45c5d7da4d130e32bb1f98f3b58ea9cd2784fad3.rev +0 -0
- llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/packed-refs +0 -2
- llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/refs/heads/master +0 -1
- llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/shallow +0 -1
- llama_cpp_python-0.2.36/.git/objects/02/c09afb0bf5559d3fe64ce67f4ff82af32ff50f +0 -0
- llama_cpp_python-0.2.36/.git/objects/03/667ba9b14656308c89e62f61377b295604a99d +0 -0
- llama_cpp_python-0.2.36/.git/objects/04/5856ea2ffe697ec33db1c1c989bd45cde5bb3d +0 -0
- llama_cpp_python-0.2.36/.git/objects/04/7bc14424303575f73af90611fec827334f54e8 +0 -0
- llama_cpp_python-0.2.36/.git/objects/0d/870969f4b23bd92a09ec29134d3fb454d38bec +0 -0
- llama_cpp_python-0.2.36/.git/objects/0e/2a907c0e40de1a432ee0cd2e3d01c634df1a37 +0 -0
- llama_cpp_python-0.2.36/.git/objects/11/2a0f7872fa9244bf38729a2722dc5c08dec20c +0 -0
- llama_cpp_python-0.2.36/.git/objects/11/b6d5c26e3f7157658952b8ec353e985d522fac +0 -1
- llama_cpp_python-0.2.36/.git/objects/13/c5b6b0df5f67e80cbe584909b83777901265a1 +0 -0
- llama_cpp_python-0.2.36/.git/objects/19/9bd4ffbf88c68c98b52c05f388dfa92716f6b7 +0 -0
- llama_cpp_python-0.2.36/.git/objects/1a/5152530cfbde487c928b60269a29fa5219f617 +0 -0
- llama_cpp_python-0.2.36/.git/objects/23/e37d4d40e5ec0bfd85b5e928834d58e2cf0da6 +0 -0
- llama_cpp_python-0.2.36/.git/objects/24/04228325d88c59b819acaedb20b15635f75df9 +0 -0
- llama_cpp_python-0.2.36/.git/objects/27/2e4767b47397d1529e6a2d01298144d9d746ba +0 -0
- llama_cpp_python-0.2.36/.git/objects/27/a6b1e5042318f85dc99c9f24f4dd6fc472d242 +0 -0
- llama_cpp_python-0.2.36/.git/objects/29/03e0146d304bcacbfabfe71f171a2edc03043e +0 -0
- llama_cpp_python-0.2.36/.git/objects/2c/c6fb02dcb3226c1ecf0fbed6453eb3acadeaa2 +0 -0
- llama_cpp_python-0.2.36/.git/objects/36/8022c457f6dd5bb566f7e2a21d88850bd98768 +0 -0
- llama_cpp_python-0.2.36/.git/objects/39/697bfc2538bd4558018d0d721d6d6028c4bb56 +0 -2
- llama_cpp_python-0.2.36/.git/objects/3a/1d7180d508818fe957923e00dcd8950938632d +0 -5
- llama_cpp_python-0.2.36/.git/objects/3a/6457dcdfd47e764654bacae0ba8347976b645a +0 -0
- llama_cpp_python-0.2.36/.git/objects/3d/d00767671c5e9dac5a2ab8f4f1331531294b60 +0 -0
- llama_cpp_python-0.2.36/.git/objects/41/3097201ac0fc27ef4dcf518b699a6fb610a54c +0 -0
- llama_cpp_python-0.2.36/.git/objects/41/cc68ea2402cf682807649d841e7c0f4175db01 +0 -0
- llama_cpp_python-0.2.36/.git/objects/46/4af5b39fea3cf1ba16e755a9df85f09bbb25ac +0 -3
- llama_cpp_python-0.2.36/.git/objects/47/4503fdfc554d8caabee4f321a80427f8c7d696 +0 -0
- llama_cpp_python-0.2.36/.git/objects/4a/106470b0b650cd76f2f5d00d744b615c72bed3 +0 -0
- llama_cpp_python-0.2.36/.git/objects/4b/3189dd1a54be3bc416786ddf184dd047dc4b20 +0 -0
- llama_cpp_python-0.2.36/.git/objects/51/f357200f8b998031f4be924e11ed2ae4bf3fea +0 -0
- llama_cpp_python-0.2.36/.git/objects/54/66de3a4e33002429b33db9668367bb32af67eb +0 -0
- llama_cpp_python-0.2.36/.git/objects/56/2410fe1a4c4093d62e1705933eaf66c6d99a2c +0 -2
- llama_cpp_python-0.2.36/.git/objects/5f/350ffe99abe7297b2839f36fd0eaeb98887805 +0 -0
- llama_cpp_python-0.2.36/.git/objects/60/bc7aef42aac0409cfdca666ad2ff6f516d7b5b +0 -0
- llama_cpp_python-0.2.36/.git/objects/61/027ef99725c50b0891fdbf0bf263a33abe648f +0 -0
- llama_cpp_python-0.2.36/.git/objects/68/7316b327ca038d26a338b3070a7d4698322515 +0 -0
- llama_cpp_python-0.2.36/.git/objects/74/739cbdeccf282532e1684caac6846aab3a7afe +0 -0
- llama_cpp_python-0.2.36/.git/objects/74/7c6130e3cb1479d20e2013b1dd3db3379c2266 +0 -1
- llama_cpp_python-0.2.36/.git/objects/76/e26fbd106895fba52861f8ac1e11cc6ee2a307 +0 -0
- llama_cpp_python-0.2.36/.git/objects/79/5dad726ae91f330c56e49bc188080b5b3b5ba9 +0 -0
- llama_cpp_python-0.2.36/.git/objects/79/a9e67a1aee09c6d182f240ba5eef32feabcbce +0 -3
- llama_cpp_python-0.2.36/.git/objects/7a/7b899ec7dd48d192cba14ac9c8e9df4233f7fb +0 -0
- llama_cpp_python-0.2.36/.git/objects/7a/bb04aa9c0f718d18a085923cdb87a670cb5437 +0 -0
- llama_cpp_python-0.2.36/.git/objects/7d/5498f9d2c49c1f731b47845d845f6c5e16a3d9 +0 -0
- llama_cpp_python-0.2.36/.git/objects/7d/6c970483161eaf43cfa9d50010c071d4953053 +0 -0
- llama_cpp_python-0.2.36/.git/objects/7e/df0975dc12ccc95ad14de085f07efe6d65c620 +0 -3
- llama_cpp_python-0.2.36/.git/objects/7e/e8f748eab47180cea09c0ad8e75c3b991b4af4 +0 -0
- llama_cpp_python-0.2.36/.git/objects/80/6b120c5e62a7be5fecc631e341d1856dac79a5 +0 -2
- llama_cpp_python-0.2.36/.git/objects/80/7b0f57a8a873e58ade0ff0f5b0bcf0ff66b7f9 +0 -0
- llama_cpp_python-0.2.36/.git/objects/82/e5c4487e57d6d59c901a73bdd2a9bc172fee7c +0 -3
- llama_cpp_python-0.2.36/.git/objects/8b/d03f88a1895cbf3ef249e13df79ee0efda779d +0 -2
- llama_cpp_python-0.2.36/.git/objects/91/abb11fdf507883caeeb2d2958e1c65fb6cbdc1 +0 -0
- llama_cpp_python-0.2.36/.git/objects/99/32d6130f5552a9b85c8b15b4ac6bc26b1068ce +0 -0
- llama_cpp_python-0.2.36/.git/objects/9d/0ec2f705618e591cfa8d6512cb9a96b3da75f1 +0 -0
- llama_cpp_python-0.2.36/.git/objects/9d/3d3559849603efda6f3c8181684e4d19e0ec79 +0 -0
- llama_cpp_python-0.2.36/.git/objects/9e/9870a52245d4f245df5a4e1b89fda121d78214 +0 -0
- llama_cpp_python-0.2.36/.git/objects/9f/0dc8a73c5bd91cea7580e9ea733f33528162db +0 -0
- llama_cpp_python-0.2.36/.git/objects/a4/5e5d77363eb85144d4aff1a3cbe86ce94d5c92 +0 -0
- llama_cpp_python-0.2.36/.git/objects/a9/e51cdc1672134ec9af66c9eccf09f6da4ceccd +0 -0
- llama_cpp_python-0.2.36/.git/objects/ac/82b8fbb81087ec9b3a72d9e377102a31b28d16 +0 -0
- llama_cpp_python-0.2.36/.git/objects/ac/aabd74d800e2a957097d56acd931e72129e7f0 +0 -0
- llama_cpp_python-0.2.36/.git/objects/b1/f90b91594f496ad9f27b1a68584984f4b523d2 +0 -0
- llama_cpp_python-0.2.36/.git/objects/b9/1632f5bc787c1404600c894a6a4126359747d8 +0 -0
- llama_cpp_python-0.2.36/.git/objects/bb/b68069d5f8125d64af0baffbd4695f1a0f729c +0 -0
- llama_cpp_python-0.2.36/.git/objects/bb/cbbe7d61558adde3cbfd0c7a63a67c27ed6d30 +0 -0
- llama_cpp_python-0.2.36/.git/objects/c1/0aee42e0da547428df7cac9845e246badf1803 +0 -0
- llama_cpp_python-0.2.36/.git/objects/c3/deba87b41117da6624f6d92a016ce50239dfc3 +0 -0
- llama_cpp_python-0.2.36/.git/objects/c4/256dd4ce7908b76e2215415b9bba7911a8deee +0 -0
- llama_cpp_python-0.2.36/.git/objects/c8/5e73d2b657bb05ed99309615d67bac93d9f86e +0 -1
- llama_cpp_python-0.2.36/.git/objects/ca/ebbb67fdb02a0a8897d4e4826ea046a9931f6f +0 -0
- llama_cpp_python-0.2.36/.git/objects/cb/221880a66e3c1f2ca15a9df52ac4bcb765e7d4 +0 -0
- llama_cpp_python-0.2.36/.git/objects/cc/6a3a7252ea6e698614f0629d4bc040ab6ca717 +0 -0
- llama_cpp_python-0.2.36/.git/objects/cd/351ba33849dcf6af35b493f7405962fa1625d4 +0 -0
- llama_cpp_python-0.2.36/.git/objects/d1/ae9b564d3ab02c6b91162e52d822d36524edb3 +0 -0
- llama_cpp_python-0.2.36/.git/objects/d3/329eec3bac6ce7e54c76b77ac9bf99fab0fe3f +0 -0
- llama_cpp_python-0.2.36/.git/objects/d3/efb3a6fe1e001db62ec08e5d31ce1d08567045 +0 -0
- llama_cpp_python-0.2.36/.git/objects/d8/ef563c2dc69fe1ea223be7bc5b0efc27ad1f9c +0 -0
- llama_cpp_python-0.2.36/.git/objects/da/c33b74dddf06fcfc01244044eebb102cfcea37 +0 -0
- llama_cpp_python-0.2.36/.git/objects/e2/13518b95011cb6ee783986624c3b6de8659f81 +0 -0
- llama_cpp_python-0.2.36/.git/objects/e4/1f375774e6945e445bfb179502b128fe22dda7 +0 -0
- llama_cpp_python-0.2.36/.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 +0 -0
- llama_cpp_python-0.2.36/.git/objects/e9/9dd1767bb3a30508e8a0de0e41bd426c13c387 +0 -0
- llama_cpp_python-0.2.36/.git/objects/eb/0fb9662e690d0f9de4632cddd321b3f872a725 +0 -0
- llama_cpp_python-0.2.36/.git/objects/eb/9a2cfa9167df02f136502af79738c71363abfd +0 -0
- llama_cpp_python-0.2.36/.git/objects/ec/47c421648c0a57c37cef86f4c5c435fb3b81ff +0 -0
- llama_cpp_python-0.2.36/.git/objects/ef/1b2c0162e8edd321e2b9c1ce375d96f1f1d048 +0 -0
- llama_cpp_python-0.2.36/.git/objects/f0/827d762e852a21f6406c469300899d5f509b8f +0 -0
- llama_cpp_python-0.2.36/.git/objects/f1/b8e9d154231932c4b7b9b59611626764e68632 +0 -0
- llama_cpp_python-0.2.36/.git/objects/f2/0813765a70679e8a063871c9ef75d75c65ccb6 +0 -0
- llama_cpp_python-0.2.36/.git/objects/f6/66a05325305ef8813f914ad5b909541bc8f786 +0 -0
- llama_cpp_python-0.2.36/.git/objects/f7/3f3d42f48aa84ceb664f490290d1840f6873e6 +0 -0
- llama_cpp_python-0.2.36/.git/objects/f7/9baa89ba3d84bc4b49e8ed314c018b9e4d4bbc +0 -0
- llama_cpp_python-0.2.36/.git/objects/fa/dfc5fb4fe6f5eb6d5d98b62519e374a5202b00 +0 -0
- llama_cpp_python-0.2.36/.git/objects/fa/f4a87a2e4515dbf0dbea0cc64f91fc7b81b8bb +0 -0
- llama_cpp_python-0.2.36/.git/objects/fc/25ff5160028dee3570249abc40cd57780bcca9 +0 -0
- llama_cpp_python-0.2.36/.git/objects/fc/ef8cd800ee8a265b146748d178cb56b5632bf3 +0 -0
- llama_cpp_python-0.2.36/.git/objects/fd/64c09b37947c97e58903ce570785c657d56722 +0 -0
- llama_cpp_python-0.2.36/.git/objects/fe/b0ed68d94eac48b844fd587ddfb808649716a1 +0 -1
- llama_cpp_python-0.2.36/.git/objects/ff/3e950cd1110fe552912cea4c268c4023d2b737 +0 -3
- llama_cpp_python-0.2.36/.git/refs/tags/v0.2.36 +0 -1
- llama_cpp_python-0.2.36/.git/shallow +0 -1
- llama_cpp_python-0.2.36/vendor/llama.cpp/scripts/sync-ggml.last +0 -1
- llama_cpp_python-0.2.36/vendor/llama.cpp/tests/test-c.c +0 -3
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.dockerignore +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/description +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/hooks/applypatch-msg.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/hooks/commit-msg.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/hooks/fsmonitor-watchman.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/hooks/post-update.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/hooks/pre-applypatch.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/hooks/pre-commit.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/hooks/pre-merge-commit.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/hooks/pre-push.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/hooks/pre-rebase.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/hooks/pre-receive.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/hooks/prepare-commit-msg.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/hooks/push-to-checkout.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/hooks/sendemail-validate.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/hooks/update.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/info/exclude +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/description +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/applypatch-msg.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/commit-msg.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/fsmonitor-watchman.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/post-update.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/pre-applypatch.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/pre-commit.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/pre-merge-commit.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/pre-push.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/pre-rebase.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/pre-receive.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/prepare-commit-msg.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/push-to-checkout.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/sendemail-validate.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/hooks/update.sample +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/info/exclude +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.git/modules/vendor/llama.cpp/refs/remotes/origin/HEAD +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.github/dependabot.yml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.github/workflows/build-and-release.yaml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.github/workflows/build-docker.yaml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.github/workflows/publish-to-test.yaml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.github/workflows/publish.yaml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.github/workflows/test-pypi.yaml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.github/workflows/test.yaml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.gitignore +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.gitmodules +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/.readthedocs.yaml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/LICENSE.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/docker/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/docker/cuda_simple/Dockerfile +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/docker/open_llama/Dockerfile +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/docker/open_llama/build.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/docker/open_llama/hug_model.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/docker/open_llama/start.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/docker/open_llama/start_server.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/docker/openblas_simple/Dockerfile +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/docker/simple/Dockerfile +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/docker/simple/run.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/docs/api-reference.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/docs/changelog.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/docs/index.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/docs/install/macos.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/docs/requirements.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/docs/server.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/examples/high_level_api/high_level_api_embedding.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/examples/high_level_api/high_level_api_inference.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/examples/high_level_api/high_level_api_streaming.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/examples/high_level_api/langchain_custom_llm.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/examples/low_level_api/Chat.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/examples/low_level_api/Miku.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/examples/low_level_api/ReasonAct.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/examples/low_level_api/common.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/examples/low_level_api/low_level_api_chat_cpp.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/examples/low_level_api/low_level_api_llama_cpp.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/examples/low_level_api/quantize.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/examples/low_level_api/readme/low_level_api_llama_cpp.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/examples/low_level_api/util.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/examples/notebooks/Batching.ipynb +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/examples/notebooks/Clients.ipynb +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/examples/notebooks/Functions.ipynb +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/examples/notebooks/Guidance.ipynb +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/examples/notebooks/Multimodal.ipynb +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/examples/notebooks/PerformanceTuning.ipynb +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/llama_cpp/_utils.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/llama_cpp/llama_cache.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/llama_cpp/llama_grammar.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/llama_cpp/llama_types.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/llama_cpp/llava_cpp.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/llama_cpp/py.typed +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/llama_cpp/server/__init__.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/llama_cpp/server/__main__.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/llama_cpp/server/app.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/llama_cpp/server/cli.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/llama_cpp/server/errors.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/llama_cpp/server/types.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/mkdocs.yml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/pyproject.toml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/tests/test_grammar.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/tests/test_llama.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/tests/test_llama_chat_format.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.clang-tidy +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/cloud-v-pipeline +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/full-cuda.Dockerfile +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/full-rocm.Dockerfile +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/full.Dockerfile +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/main-cuda.Dockerfile +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/main-intel.Dockerfile +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/main-rocm.Dockerfile +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/main.Dockerfile +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/nix/apps.nix +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/nix/devshells.nix +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/nix/jetson-support.nix +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/nix/nixpkgs-instances.nix +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/nix/package.nix +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/nix/scope.nix +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/server-cuda.Dockerfile +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/server-intel.Dockerfile +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/server-rocm.Dockerfile +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/server.Dockerfile +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.devops/tools.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.dockerignore +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.editorconfig +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.flake8 +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.git +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/ISSUE_TEMPLATE/bug.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/ISSUE_TEMPLATE/enhancement.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/code-coverage.yml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/docker.yml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/gguf-publish.yml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/nix-ci.yml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/nix-flake-update.yml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/python-check-requirements.yml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/python-lint.yml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/tidy-post.yml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/tidy-review.yml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.github/workflows/zig-build.yml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/.pre-commit-config.yaml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/LICENSE +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/Makefile +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/Package.swift +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/SHA256SUMS +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/awq-py/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/awq-py/awq/apply_awq.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/awq-py/requirements.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/build.zig +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ci/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ci/run.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/cmake/FindSIMD.cmake +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/codecov.yml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/base64.hpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/build-info.cpp.in +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/console.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/console.h +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/grammar-parser.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/grammar-parser.h +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/log.h +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/sampling.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/sampling.h +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/stb_image.h +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/common/train.h +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/convert-hf-to-gguf.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/convert-llama-ggml-to-gguf.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/convert-lora-to-ggml.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/convert-persimmon-to-gguf.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/convert.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/docs/BLIS.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/docs/llama-star/idea-arch.key +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/docs/llama-star/idea-arch.pdf +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/docs/token_generation_performance_tips.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/Miku.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/alpaca.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/base-translate.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched/batched.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched-bench/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched-bench/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched.swift/.gitignore +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched.swift/Makefile +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched.swift/Package.swift +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched.swift/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/batched.swift/Sources/main.swift +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/beam-search/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/beam-search/beam-search.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/benchmark/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/chat-13B.bat +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/chat-13B.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/chat-persistent.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/chat-vicuna.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/chat.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/convert-llama2c-to-ggml/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/embedding/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/embedding/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/embedding/embedding.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/export-lora/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/export-lora/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/export-lora/export-lora.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/finetune/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/finetune/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/finetune/convert-finetune-checkpoint-to-gguf.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/finetune/finetune.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/finetune/finetune.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/gguf/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/gguf/gguf.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/gpt4all.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/imatrix/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/imatrix/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/imatrix/imatrix.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/infill/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/infill/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/infill/infill.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/jeopardy/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/jeopardy/graph.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/jeopardy/jeopardy.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/jeopardy/qasheet.csv +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/jeopardy/questions.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/json-schema-to-grammar.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama-bench/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama-bench/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/.gitignore +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/.gitignore +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/proguard-rules.pro +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/AndroidManifest.xml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/cpp/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/cpp/llama-android.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/Downloadable.kt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/Llm.kt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/MainActivity.kt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/MainViewModel.kt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Color.kt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Theme.kt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Type.kt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/drawable/ic_launcher_background.xml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/drawable/ic_launcher_foreground.xml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-anydpi/ic_launcher.xml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-anydpi/ic_launcher_round.xml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-hdpi/ic_launcher.webp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-mdpi/ic_launcher.webp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xhdpi/ic_launcher.webp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/values/colors.xml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/values/strings.xml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/values/themes.xml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/xml/backup_rules.xml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/app/src/main/res/xml/data_extraction_rules.xml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/build.gradle.kts +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/gradle/wrapper/gradle-wrapper.jar +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/gradle/wrapper/gradle-wrapper.properties +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/gradle.properties +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/gradlew +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.android/settings.gradle.kts +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/.gitignore +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/AppIcon.appiconset/Contents.json +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/Contents.json +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Models/LlamaState.swift +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/DownloadButton.swift +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/InputButton.swift +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/LoadCustomButton.swift +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/llama_swiftuiApp.swift +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/contents.xcworkspacedata +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama.vim +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama2-13b.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llama2.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/android/adb_run.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/android/build_64.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/clip.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/clip.h +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/convert-image-encoder-to-gguf.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/llava-cli.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/llava-surgery.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/llava.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llava/llava.h +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/llm.vim +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/lookahead/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/lookahead/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/lookahead/lookahead.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/lookup/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/lookup/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/lookup/lookup.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/main/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/main/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/main-cmake-pkg/.gitignore +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/main-cmake-pkg/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/make-ggml.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/parallel/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/parallel/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/parallel/parallel.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/passkey/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/passkey/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/passkey/passkey.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/perplexity/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/perplexity/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/perplexity/perplexity.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/pydantic-models-to-grammar-examples.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/pydantic_models_to_grammar.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/quantize/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/quantize/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/reason-act.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/api_like_OAI.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/chat-llama2.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/chat.mjs +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/completion.js.hpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/deps.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/httplib.h +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/index.html.hpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/index.js.hpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/json-schema-to-grammar.mjs.hpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/json.hpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/oai.hpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/public/completion.js +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/public/index.html +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/public/index.js +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/public/json-schema-to-grammar.mjs +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server/utils.hpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/server-llama2-13B.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/simple/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/simple/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/simple/simple.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/speculative/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/speculative/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/speculative/speculative.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/sycl/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/sycl/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/sycl/build.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/sycl/run-llama2.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/tokenize/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/tokenize/tokenize.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/train-text-from-scratch/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/flake.lock +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/flake.nix +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-alloc.h +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-backend-impl.h +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-backend.h +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-cuda.h +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-impl.h +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-mpi.c +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-mpi.h +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-opencl.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-opencl.h +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/ggml-vulkan.h +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/LICENSE +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/examples/writer.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/gguf/__init__.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/gguf/constants.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/gguf/gguf.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/gguf/gguf_reader.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/gguf/gguf_writer.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/gguf/py.typed +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/gguf/tensor_mapping.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/gguf/vocab.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/pyproject.toml +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/scripts/__init__.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/scripts/gguf-convert-endian.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/scripts/gguf-dump.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/scripts/gguf-set-metadata.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/gguf-py/tests/test_gguf.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/grammars/README.md +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/grammars/arithmetic.gbnf +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/grammars/c.gbnf +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/grammars/chess.gbnf +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/grammars/japanese.gbnf +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/grammars/json.gbnf +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/grammars/json_arr.gbnf +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/grammars/list.gbnf +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/media/llama-leader.jpeg +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/media/llama0-banner.png +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/media/llama0-logo.png +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/media/llama1-banner.png +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/media/llama1-logo.png +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/mypy.ini +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/pocs/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/pocs/vdot/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/pocs/vdot/q8dot.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/pocs/vdot/vdot.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/LLM-questions.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/alpaca.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/assistant.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/chat-with-baichuan.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/chat-with-bob.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/chat-with-qwen.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/chat.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/dan-modified.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/dan.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/mnemonics.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/parallel-questions.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/prompts/reason-act.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/requirements/requirements-convert-lora-to-ggml.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/requirements/requirements-convert-persimmon-to-gguf.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/requirements/requirements-convert.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/requirements.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/LlamaConfig.cmake.in +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/build-info.cmake +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/build-info.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/check-requirements.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/ci-run.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/compare-llama-bench.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/convert-gg.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/gen-build-info-cpp.cmake +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/get-flags.mk +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/get-hellaswag.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/get-pg.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/get-wikitext-2.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/get-winogrande.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/qnt-all.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/run-all-perf.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/run-all-ppl.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/run-with-preset.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/server-llm.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/sync-ggml-am.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/sync-ggml.sh +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/scripts/verify-checksum-models.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/spm-headers/llama.h +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/.gitignore +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/CMakeLists.txt +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/get-model.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/get-model.h +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-autorelease.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-double-float.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-grad0.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-grammar-parser.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-llama-grammar.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-model-load-cancel.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-opt.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-rope.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-sampling.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-tokenizer-0-falcon.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-tokenizer-0-falcon.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-tokenizer-0-llama.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-tokenizer-0-llama.py +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/tests/test-tokenizer-1-llama.cpp +0 -0
- {llama_cpp_python-0.2.36 → llama_cpp_python-0.2.38}/vendor/llama.cpp/unicode.h +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3322eadbf30a68731f6aafe0b4d055255b46d8f7 '3322eadbf30a68731f6aafe0b4d055255b46d8f7' of https://github.com/abetlen/llama-cpp-python
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3322eadbf30a68731f6aafe0b4d055255b46d8f7
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
[gc]
|
|
10
10
|
auto = 0
|
|
11
11
|
[http "https://github.com/"]
|
|
12
|
-
extraheader = AUTHORIZATION: basic
|
|
12
|
+
extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzX2ptaUFjRVlmS1RIZmJGRE9hdnltMDZJQ0p2MGVoTjFxOGFWNQ==
|
|
13
13
|
[submodule "vendor/llama.cpp"]
|
|
14
14
|
active = true
|
|
15
15
|
url = https://github.com/ggerganov/llama.cpp.git
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 3322eadbf30a68731f6aafe0b4d055255b46d8f7 runner <runner@fv-az1016-588.pv3vitign2bulj5h5vrau5ekvd.cx.internal.cloudapp.net> 1706732571 +0000 checkout: moving from master to refs/tags/v0.2.38
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
[gc]
|
|
14
14
|
auto = 0
|
|
15
15
|
[http "https://github.com/"]
|
|
16
|
-
extraheader = AUTHORIZATION: basic
|
|
16
|
+
extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzX2ptaUFjRVlmS1RIZmJGRE9hdnltMDZJQ0p2MGVoTjFxOGFWNQ==
|
|
17
17
|
[url "https://github.com/"]
|
|
18
18
|
insteadOf = git@github.com:
|
|
19
19
|
insteadOf = org-6826477@github.com:
|
|
Binary file
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915 runner <runner@fv-az1016-588.pv3vitign2bulj5h5vrau5ekvd.cx.internal.cloudapp.net> 1706732572 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
|
|
2
|
+
1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915 1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915 runner <runner@fv-az1016-588.pv3vitign2bulj5h5vrau5ekvd.cx.internal.cloudapp.net> 1706732572 +0000 checkout: moving from master to 1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915 runner <runner@fv-az1016-588.pv3vitign2bulj5h5vrau5ekvd.cx.internal.cloudapp.net> 1706732572 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0000000000000000000000000000000000000000 1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915 runner <runner@fv-az1016-588.pv3vitign2bulj5h5vrau5ekvd.cx.internal.cloudapp.net> 1706732572 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3322eadbf30a68731f6aafe0b4d055255b46d8f7
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3322eadbf30a68731f6aafe0b4d055255b46d8f7
|
|
@@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.2.38]
|
|
11
|
+
|
|
12
|
+
- feat: Update llama.cpp to ggerganov/llama.cpp@1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915
|
|
13
|
+
- feat: Add speculative decoding by @abetlen in #1120
|
|
14
|
+
- fix: Pass raise_exception and add_generation_prompt to jinja2 chat template 078cca0361bf5a94d2cf52ed04980d20e32d6f95
|
|
15
|
+
|
|
16
|
+
## [0.2.37]
|
|
17
|
+
|
|
18
|
+
- feat: Update llama.cpp to ggerganov/llama.cpp@fea4fd4ba7f6b754ac795387b275e1a014a77bde
|
|
19
|
+
- feat: Automatically set chat format from gguf by @abetlen in #1110
|
|
20
|
+
|
|
10
21
|
## [0.2.36]
|
|
11
22
|
|
|
12
23
|
- feat: Update llama.cpp to ggerganov/llama.cpp@2aed77eb06a329f0d82bb1c467f4244904d4073f
|
|
@@ -30,6 +30,12 @@ build.metal:
|
|
|
30
30
|
build.vulkan:
|
|
31
31
|
CMAKE_ARGS="-DLLAMA_VULKAN=on" python3 -m pip install --verbose -e .
|
|
32
32
|
|
|
33
|
+
build.kompute:
|
|
34
|
+
CMAKE_ARGS="-DLLAMA_KOMPUTE=on" python3 -m pip install --verbose -e .
|
|
35
|
+
|
|
36
|
+
build.sycl:
|
|
37
|
+
CMAKE_ARGS="-DLLAMA_SYCL=on" python3 -m pip install --verbose -e .
|
|
38
|
+
|
|
33
39
|
build.sdist:
|
|
34
40
|
python3 -m build --sdist
|
|
35
41
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: llama_cpp_python
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.38
|
|
4
4
|
Summary: Python bindings for the llama.cpp library
|
|
5
5
|
Author-Email: Andrei Betlen <abetlen@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -55,20 +55,17 @@ This package provides:
|
|
|
55
55
|
|
|
56
56
|
- Low-level access to C API via `ctypes` interface.
|
|
57
57
|
- High-level Python API for text completion
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
58
|
+
- OpenAI-like API
|
|
59
|
+
- [LangChain compatibility](https://python.langchain.com/docs/integrations/llms/llamacpp)
|
|
60
|
+
- [LlamaIndex compatibility](https://docs.llamaindex.ai/en/stable/examples/llm/llama_2_llama_cpp.html)
|
|
61
61
|
- OpenAI compatible web server
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
62
|
+
- [Local Copilot replacement](https://llama-cpp-python.readthedocs.io/en/latest/server/#code-completion)
|
|
63
|
+
- [Function Calling support](https://llama-cpp-python.readthedocs.io/en/latest/server/#function-calling)
|
|
64
|
+
- [Vision API support](https://llama-cpp-python.readthedocs.io/en/latest/server/#multimodal-models)
|
|
65
|
+
- [Multiple Models](https://llama-cpp-python.readthedocs.io/en/latest/server/#configuration-and-multi-model-support)
|
|
66
66
|
|
|
67
67
|
Documentation is available at [https://llama-cpp-python.readthedocs.io/en/latest](https://llama-cpp-python.readthedocs.io/en/latest).
|
|
68
68
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
69
|
## Installation
|
|
73
70
|
|
|
74
71
|
`llama-cpp-python` can be installed directly from PyPI as a source distribution by running:
|
|
@@ -81,7 +78,6 @@ This will build `llama.cpp` from source using cmake and your system's c compiler
|
|
|
81
78
|
|
|
82
79
|
If you run into issues during installation add the `--verbose` flag to the `pip install` command to see the full cmake build log.
|
|
83
80
|
|
|
84
|
-
|
|
85
81
|
### Installation with Specific Hardware Acceleration (BLAS, CUDA, Metal, etc)
|
|
86
82
|
|
|
87
83
|
The default pip install behaviour is to build `llama.cpp` for CPU only on Linux and Windows and use Metal on MacOS.
|
|
@@ -114,7 +110,7 @@ CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install llama-cpp-
|
|
|
114
110
|
|
|
115
111
|
#### cuBLAS
|
|
116
112
|
|
|
117
|
-
To install with cuBLAS, set the `LLAMA_CUBLAS=
|
|
113
|
+
To install with cuBLAS, set the `LLAMA_CUBLAS=on` environment variable before installing:
|
|
118
114
|
|
|
119
115
|
```bash
|
|
120
116
|
CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python
|
|
@@ -130,7 +126,7 @@ CMAKE_ARGS="-DLLAMA_METAL=on" pip install llama-cpp-python
|
|
|
130
126
|
|
|
131
127
|
#### CLBlast
|
|
132
128
|
|
|
133
|
-
To install with CLBlast, set the `LLAMA_CLBLAST=
|
|
129
|
+
To install with CLBlast, set the `LLAMA_CLBLAST=on` environment variable before installing:
|
|
134
130
|
|
|
135
131
|
```bash
|
|
136
132
|
CMAKE_ARGS="-DLLAMA_CLBLAST=on" pip install llama-cpp-python
|
|
@@ -144,13 +140,37 @@ To install with hipBLAS / ROCm support for AMD cards, set the `LLAMA_HIPBLAS=on`
|
|
|
144
140
|
CMAKE_ARGS="-DLLAMA_HIPBLAS=on" pip install llama-cpp-python
|
|
145
141
|
```
|
|
146
142
|
|
|
143
|
+
#### Vulkan
|
|
144
|
+
|
|
145
|
+
To install with Vulkan support, set the `LLAMA_VULKAN=on` environment variable before installing:
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
CMAKE_ARGS="-DLLAMA_VULKAN=on" pip install llama-cpp-python
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
#### Kompute
|
|
152
|
+
|
|
153
|
+
To install with Kompute support, set the `LLAMA_KOMPUTE=on` environment variable before installing:
|
|
154
|
+
|
|
155
|
+
```bash
|
|
156
|
+
CMAKE_ARGS="-DLLAMA_KOMPUTE=on" pip install llama-cpp-python
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
#### SYCL
|
|
160
|
+
|
|
161
|
+
To install with SYCL support, set the `LLAMA_SYCL=on` environment variable before installing:
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
CMAKE_ARGS="-DLLAMA_SYCL=on" pip install llama-cpp-python
|
|
165
|
+
```
|
|
166
|
+
|
|
147
167
|
### Windows Notes
|
|
148
168
|
|
|
149
169
|
If you run into issues where it complains it can't find `'nmake'` `'?'` or CMAKE_C_COMPILER, you can extract w64devkit as [mentioned in llama.cpp repo](https://github.com/ggerganov/llama.cpp#openblas) and add those manually to CMAKE_ARGS before running `pip` install:
|
|
150
170
|
|
|
151
171
|
```ps
|
|
152
172
|
$env:CMAKE_GENERATOR = "MinGW Makefiles"
|
|
153
|
-
$env:CMAKE_ARGS = "-DLLAMA_OPENBLAS=on -DCMAKE_C_COMPILER=C:/w64devkit/bin/gcc.exe -DCMAKE_CXX_COMPILER=C:/w64devkit/bin/g++.exe"
|
|
173
|
+
$env:CMAKE_ARGS = "-DLLAMA_OPENBLAS=on -DCMAKE_C_COMPILER=C:/w64devkit/bin/gcc.exe -DCMAKE_CXX_COMPILER=C:/w64devkit/bin/g++.exe"
|
|
154
174
|
```
|
|
155
175
|
|
|
156
176
|
See the above instructions and set `CMAKE_ARGS` to the BLAS backend you want to use.
|
|
@@ -200,7 +220,7 @@ Below is a short example demonstrating how to use the high-level API to for basi
|
|
|
200
220
|
>>> from llama_cpp import Llama
|
|
201
221
|
>>> llm = Llama(
|
|
202
222
|
model_path="./models/7B/llama-model.gguf",
|
|
203
|
-
# n_gpu_layers=-1, # Uncomment to use GPU acceleration
|
|
223
|
+
# n_gpu_layers=-1, # Uncomment to use GPU acceleration
|
|
204
224
|
# seed=1337, # Uncomment to set a specific seed
|
|
205
225
|
# n_ctx=2048, # Uncomment to increase the context window
|
|
206
226
|
)
|
|
@@ -319,7 +339,6 @@ The high-level API also provides a simple interface for function calling.
|
|
|
319
339
|
Note that the only model that supports full function calling at this time is "functionary".
|
|
320
340
|
The gguf-converted files for this model can be found here: [functionary-7b-v1](https://huggingface.co/abetlen/functionary-7b-v1-GGUF)
|
|
321
341
|
|
|
322
|
-
|
|
323
342
|
```python
|
|
324
343
|
>>> from llama_cpp import Llama
|
|
325
344
|
>>> llm = Llama(model_path="path/to/functionary/llama-model.gguf", chat_format="functionary")
|
|
@@ -328,7 +347,7 @@ The gguf-converted files for this model can be found here: [functionary-7b-v1](h
|
|
|
328
347
|
{
|
|
329
348
|
"role": "system",
|
|
330
349
|
"content": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. The assistant calls functions with appropriate input when necessary"
|
|
331
|
-
|
|
350
|
+
|
|
332
351
|
},
|
|
333
352
|
{
|
|
334
353
|
"role": "user",
|
|
@@ -367,7 +386,6 @@ The gguf-converted files for this model can be found here: [functionary-7b-v1](h
|
|
|
367
386
|
|
|
368
387
|
### Multi-modal Models
|
|
369
388
|
|
|
370
|
-
|
|
371
389
|
`llama-cpp-python` supports the llava1.5 family of multi-modal models which allow the language model to
|
|
372
390
|
read information from both text and images.
|
|
373
391
|
|
|
@@ -403,6 +421,24 @@ Then you'll need to use a custom chat handler to load the clip model and process
|
|
|
403
421
|
)
|
|
404
422
|
```
|
|
405
423
|
|
|
424
|
+
### Speculative Decoding
|
|
425
|
+
|
|
426
|
+
`llama-cpp-python` supports speculative decoding which allows the model to generate completions based on a draft model.
|
|
427
|
+
|
|
428
|
+
The fastest way to use speculative decoding is through the `LlamaPromptLookupDecoding` class.
|
|
429
|
+
|
|
430
|
+
Just pass this as a draft model to the `Llama` class during initialization.
|
|
431
|
+
|
|
432
|
+
```python
|
|
433
|
+
from llama_cpp import Llama
|
|
434
|
+
from llama_cpp.llama_speculative import LlamaPromptLookupDecoding
|
|
435
|
+
|
|
436
|
+
llama = Llama(
|
|
437
|
+
model_path="path/to/model.gguf",
|
|
438
|
+
draft_model=LlamaPromptLookupDecoding(num_pred_tokens=10) # num_pred_tokens is the number of tokens to predict 10 is the default and generally good for gpu, 2 performs better for cpu-only machines.
|
|
439
|
+
)
|
|
440
|
+
```
|
|
441
|
+
|
|
406
442
|
### Adjusting the Context Window
|
|
407
443
|
|
|
408
444
|
The context window of the Llama models determines the maximum number of tokens that can be processed at once. By default, this is set to 512 tokens, but can be adjusted based on your requirements.
|
|
@@ -413,7 +449,6 @@ For instance, if you want to work with larger contexts, you can expand the conte
|
|
|
413
449
|
llm = Llama(model_path="./models/7B/llama-model.gguf", n_ctx=2048)
|
|
414
450
|
```
|
|
415
451
|
|
|
416
|
-
|
|
417
452
|
## OpenAI Compatible Web Server
|
|
418
453
|
|
|
419
454
|
`llama-cpp-python` offers a web server which aims to act as a drop-in replacement for the OpenAI API.
|
|
@@ -461,7 +496,8 @@ A Docker image is available on [GHCR](https://ghcr.io/abetlen/llama-cpp-python).
|
|
|
461
496
|
```bash
|
|
462
497
|
docker run --rm -it -p 8000:8000 -v /path/to/models:/models -e MODEL=/models/llama-model.gguf ghcr.io/abetlen/llama-cpp-python:latest
|
|
463
498
|
```
|
|
464
|
-
|
|
499
|
+
|
|
500
|
+
[Docker on termux (requires root)](https://gist.github.com/FreddieOliveira/efe850df7ff3951cb62d74bd770dce27) is currently the only known way to run this on phones, see [termux support issue](https://github.com/abetlen/llama-cpp-python/issues/389)
|
|
465
501
|
|
|
466
502
|
## Low-level API
|
|
467
503
|
|
|
@@ -489,7 +525,6 @@ Below is a short example demonstrating how to use the low-level API to tokenize
|
|
|
489
525
|
|
|
490
526
|
Check out the [examples folder](examples/low_level_api) for more examples of using the low-level API.
|
|
491
527
|
|
|
492
|
-
|
|
493
528
|
## Documentation
|
|
494
529
|
|
|
495
530
|
Documentation is available via [https://llama-cpp-python.readthedocs.io/](https://llama-cpp-python.readthedocs.io/).
|
|
@@ -12,20 +12,17 @@ This package provides:
|
|
|
12
12
|
|
|
13
13
|
- Low-level access to C API via `ctypes` interface.
|
|
14
14
|
- High-level Python API for text completion
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
15
|
+
- OpenAI-like API
|
|
16
|
+
- [LangChain compatibility](https://python.langchain.com/docs/integrations/llms/llamacpp)
|
|
17
|
+
- [LlamaIndex compatibility](https://docs.llamaindex.ai/en/stable/examples/llm/llama_2_llama_cpp.html)
|
|
18
18
|
- OpenAI compatible web server
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
19
|
+
- [Local Copilot replacement](https://llama-cpp-python.readthedocs.io/en/latest/server/#code-completion)
|
|
20
|
+
- [Function Calling support](https://llama-cpp-python.readthedocs.io/en/latest/server/#function-calling)
|
|
21
|
+
- [Vision API support](https://llama-cpp-python.readthedocs.io/en/latest/server/#multimodal-models)
|
|
22
|
+
- [Multiple Models](https://llama-cpp-python.readthedocs.io/en/latest/server/#configuration-and-multi-model-support)
|
|
23
23
|
|
|
24
24
|
Documentation is available at [https://llama-cpp-python.readthedocs.io/en/latest](https://llama-cpp-python.readthedocs.io/en/latest).
|
|
25
25
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
26
|
## Installation
|
|
30
27
|
|
|
31
28
|
`llama-cpp-python` can be installed directly from PyPI as a source distribution by running:
|
|
@@ -38,7 +35,6 @@ This will build `llama.cpp` from source using cmake and your system's c compiler
|
|
|
38
35
|
|
|
39
36
|
If you run into issues during installation add the `--verbose` flag to the `pip install` command to see the full cmake build log.
|
|
40
37
|
|
|
41
|
-
|
|
42
38
|
### Installation with Specific Hardware Acceleration (BLAS, CUDA, Metal, etc)
|
|
43
39
|
|
|
44
40
|
The default pip install behaviour is to build `llama.cpp` for CPU only on Linux and Windows and use Metal on MacOS.
|
|
@@ -71,7 +67,7 @@ CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install llama-cpp-
|
|
|
71
67
|
|
|
72
68
|
#### cuBLAS
|
|
73
69
|
|
|
74
|
-
To install with cuBLAS, set the `LLAMA_CUBLAS=
|
|
70
|
+
To install with cuBLAS, set the `LLAMA_CUBLAS=on` environment variable before installing:
|
|
75
71
|
|
|
76
72
|
```bash
|
|
77
73
|
CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python
|
|
@@ -87,7 +83,7 @@ CMAKE_ARGS="-DLLAMA_METAL=on" pip install llama-cpp-python
|
|
|
87
83
|
|
|
88
84
|
#### CLBlast
|
|
89
85
|
|
|
90
|
-
To install with CLBlast, set the `LLAMA_CLBLAST=
|
|
86
|
+
To install with CLBlast, set the `LLAMA_CLBLAST=on` environment variable before installing:
|
|
91
87
|
|
|
92
88
|
```bash
|
|
93
89
|
CMAKE_ARGS="-DLLAMA_CLBLAST=on" pip install llama-cpp-python
|
|
@@ -101,13 +97,37 @@ To install with hipBLAS / ROCm support for AMD cards, set the `LLAMA_HIPBLAS=on`
|
|
|
101
97
|
CMAKE_ARGS="-DLLAMA_HIPBLAS=on" pip install llama-cpp-python
|
|
102
98
|
```
|
|
103
99
|
|
|
100
|
+
#### Vulkan
|
|
101
|
+
|
|
102
|
+
To install with Vulkan support, set the `LLAMA_VULKAN=on` environment variable before installing:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
CMAKE_ARGS="-DLLAMA_VULKAN=on" pip install llama-cpp-python
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
#### Kompute
|
|
109
|
+
|
|
110
|
+
To install with Kompute support, set the `LLAMA_KOMPUTE=on` environment variable before installing:
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
CMAKE_ARGS="-DLLAMA_KOMPUTE=on" pip install llama-cpp-python
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
#### SYCL
|
|
117
|
+
|
|
118
|
+
To install with SYCL support, set the `LLAMA_SYCL=on` environment variable before installing:
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
CMAKE_ARGS="-DLLAMA_SYCL=on" pip install llama-cpp-python
|
|
122
|
+
```
|
|
123
|
+
|
|
104
124
|
### Windows Notes
|
|
105
125
|
|
|
106
126
|
If you run into issues where it complains it can't find `'nmake'` `'?'` or CMAKE_C_COMPILER, you can extract w64devkit as [mentioned in llama.cpp repo](https://github.com/ggerganov/llama.cpp#openblas) and add those manually to CMAKE_ARGS before running `pip` install:
|
|
107
127
|
|
|
108
128
|
```ps
|
|
109
129
|
$env:CMAKE_GENERATOR = "MinGW Makefiles"
|
|
110
|
-
$env:CMAKE_ARGS = "-DLLAMA_OPENBLAS=on -DCMAKE_C_COMPILER=C:/w64devkit/bin/gcc.exe -DCMAKE_CXX_COMPILER=C:/w64devkit/bin/g++.exe"
|
|
130
|
+
$env:CMAKE_ARGS = "-DLLAMA_OPENBLAS=on -DCMAKE_C_COMPILER=C:/w64devkit/bin/gcc.exe -DCMAKE_CXX_COMPILER=C:/w64devkit/bin/g++.exe"
|
|
111
131
|
```
|
|
112
132
|
|
|
113
133
|
See the above instructions and set `CMAKE_ARGS` to the BLAS backend you want to use.
|
|
@@ -157,7 +177,7 @@ Below is a short example demonstrating how to use the high-level API to for basi
|
|
|
157
177
|
>>> from llama_cpp import Llama
|
|
158
178
|
>>> llm = Llama(
|
|
159
179
|
model_path="./models/7B/llama-model.gguf",
|
|
160
|
-
# n_gpu_layers=-1, # Uncomment to use GPU acceleration
|
|
180
|
+
# n_gpu_layers=-1, # Uncomment to use GPU acceleration
|
|
161
181
|
# seed=1337, # Uncomment to set a specific seed
|
|
162
182
|
# n_ctx=2048, # Uncomment to increase the context window
|
|
163
183
|
)
|
|
@@ -276,7 +296,6 @@ The high-level API also provides a simple interface for function calling.
|
|
|
276
296
|
Note that the only model that supports full function calling at this time is "functionary".
|
|
277
297
|
The gguf-converted files for this model can be found here: [functionary-7b-v1](https://huggingface.co/abetlen/functionary-7b-v1-GGUF)
|
|
278
298
|
|
|
279
|
-
|
|
280
299
|
```python
|
|
281
300
|
>>> from llama_cpp import Llama
|
|
282
301
|
>>> llm = Llama(model_path="path/to/functionary/llama-model.gguf", chat_format="functionary")
|
|
@@ -285,7 +304,7 @@ The gguf-converted files for this model can be found here: [functionary-7b-v1](h
|
|
|
285
304
|
{
|
|
286
305
|
"role": "system",
|
|
287
306
|
"content": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. The assistant calls functions with appropriate input when necessary"
|
|
288
|
-
|
|
307
|
+
|
|
289
308
|
},
|
|
290
309
|
{
|
|
291
310
|
"role": "user",
|
|
@@ -324,7 +343,6 @@ The gguf-converted files for this model can be found here: [functionary-7b-v1](h
|
|
|
324
343
|
|
|
325
344
|
### Multi-modal Models
|
|
326
345
|
|
|
327
|
-
|
|
328
346
|
`llama-cpp-python` supports the llava1.5 family of multi-modal models which allow the language model to
|
|
329
347
|
read information from both text and images.
|
|
330
348
|
|
|
@@ -360,6 +378,24 @@ Then you'll need to use a custom chat handler to load the clip model and process
|
|
|
360
378
|
)
|
|
361
379
|
```
|
|
362
380
|
|
|
381
|
+
### Speculative Decoding
|
|
382
|
+
|
|
383
|
+
`llama-cpp-python` supports speculative decoding which allows the model to generate completions based on a draft model.
|
|
384
|
+
|
|
385
|
+
The fastest way to use speculative decoding is through the `LlamaPromptLookupDecoding` class.
|
|
386
|
+
|
|
387
|
+
Just pass this as a draft model to the `Llama` class during initialization.
|
|
388
|
+
|
|
389
|
+
```python
|
|
390
|
+
from llama_cpp import Llama
|
|
391
|
+
from llama_cpp.llama_speculative import LlamaPromptLookupDecoding
|
|
392
|
+
|
|
393
|
+
llama = Llama(
|
|
394
|
+
model_path="path/to/model.gguf",
|
|
395
|
+
draft_model=LlamaPromptLookupDecoding(num_pred_tokens=10) # num_pred_tokens is the number of tokens to predict 10 is the default and generally good for gpu, 2 performs better for cpu-only machines.
|
|
396
|
+
)
|
|
397
|
+
```
|
|
398
|
+
|
|
363
399
|
### Adjusting the Context Window
|
|
364
400
|
|
|
365
401
|
The context window of the Llama models determines the maximum number of tokens that can be processed at once. By default, this is set to 512 tokens, but can be adjusted based on your requirements.
|
|
@@ -370,7 +406,6 @@ For instance, if you want to work with larger contexts, you can expand the conte
|
|
|
370
406
|
llm = Llama(model_path="./models/7B/llama-model.gguf", n_ctx=2048)
|
|
371
407
|
```
|
|
372
408
|
|
|
373
|
-
|
|
374
409
|
## OpenAI Compatible Web Server
|
|
375
410
|
|
|
376
411
|
`llama-cpp-python` offers a web server which aims to act as a drop-in replacement for the OpenAI API.
|
|
@@ -418,7 +453,8 @@ A Docker image is available on [GHCR](https://ghcr.io/abetlen/llama-cpp-python).
|
|
|
418
453
|
```bash
|
|
419
454
|
docker run --rm -it -p 8000:8000 -v /path/to/models:/models -e MODEL=/models/llama-model.gguf ghcr.io/abetlen/llama-cpp-python:latest
|
|
420
455
|
```
|
|
421
|
-
|
|
456
|
+
|
|
457
|
+
[Docker on termux (requires root)](https://gist.github.com/FreddieOliveira/efe850df7ff3951cb62d74bd770dce27) is currently the only known way to run this on phones, see [termux support issue](https://github.com/abetlen/llama-cpp-python/issues/389)
|
|
422
458
|
|
|
423
459
|
## Low-level API
|
|
424
460
|
|
|
@@ -446,7 +482,6 @@ Below is a short example demonstrating how to use the low-level API to tokenize
|
|
|
446
482
|
|
|
447
483
|
Check out the [examples folder](examples/low_level_api) for more examples of using the low-level API.
|
|
448
484
|
|
|
449
|
-
|
|
450
485
|
## Documentation
|
|
451
486
|
|
|
452
487
|
Documentation is available via [https://llama-cpp-python.readthedocs.io/](https://llama-cpp-python.readthedocs.io/).
|
|
@@ -216,13 +216,13 @@ class _LlamaModel:
|
|
|
216
216
|
for i in range(llama_cpp.llama_model_meta_count(self.model)):
|
|
217
217
|
nbytes = llama_cpp.llama_model_meta_key_by_index(self.model, i, buffer, buffer_size)
|
|
218
218
|
if nbytes > buffer_size:
|
|
219
|
-
buffer_size = nbytes
|
|
219
|
+
buffer_size = nbytes + 1
|
|
220
220
|
buffer = ctypes.create_string_buffer(buffer_size)
|
|
221
221
|
nbytes = llama_cpp.llama_model_meta_key_by_index(self.model, i, buffer, buffer_size)
|
|
222
222
|
key = buffer.value.decode("utf-8")
|
|
223
223
|
nbytes = llama_cpp.llama_model_meta_val_str_by_index(self.model, i, buffer, buffer_size)
|
|
224
224
|
if nbytes > buffer_size:
|
|
225
|
-
buffer_size = nbytes
|
|
225
|
+
buffer_size = nbytes + 1
|
|
226
226
|
buffer = ctypes.create_string_buffer(buffer_size)
|
|
227
227
|
nbytes = llama_cpp.llama_model_meta_val_str_by_index(self.model, i, buffer, buffer_size)
|
|
228
228
|
value = buffer.value.decode("utf-8")
|