llama-cpp-python 0.2.24__tar.gz → 0.2.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (570) hide show
  1. llama_cpp_python-0.2.25/.git/FETCH_HEAD +1 -0
  2. llama_cpp_python-0.2.25/.git/HEAD +1 -0
  3. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/config +1 -1
  4. llama_cpp_python-0.2.25/.git/index +0 -0
  5. llama_cpp_python-0.2.25/.git/logs/HEAD +1 -0
  6. llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/HEAD +1 -0
  7. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/config +1 -1
  8. llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/index +0 -0
  9. llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/logs/HEAD +2 -0
  10. llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/logs/refs/heads/master +1 -0
  11. llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/logs/refs/remotes/origin/HEAD +1 -0
  12. llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/objects/pack/pack-53054ebe4aff53f3127333b245dfe71db566f164.idx +0 -0
  13. llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/objects/pack/pack-b630c306381137c42fbadc02d9f2c158165b39b1.pack → llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/objects/pack/pack-53054ebe4aff53f3127333b245dfe71db566f164.pack +0 -0
  14. llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/objects/pack/pack-53054ebe4aff53f3127333b245dfe71db566f164.rev +0 -0
  15. llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/packed-refs +2 -0
  16. llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/refs/heads/master +1 -0
  17. llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/refs/tags/b1691 +1 -0
  18. llama_cpp_python-0.2.25/.git/modules/vendor/llama.cpp/shallow +1 -0
  19. llama_cpp_python-0.2.25/.git/objects/13/454a3a6bea90892a42064c32f7a1a60deb0806 +0 -0
  20. llama_cpp_python-0.2.25/.git/objects/1c/5efea21fad700ef81acb5682eb71efa64c7453 +0 -0
  21. llama_cpp_python-0.2.25/.git/objects/2b/14bc6783798c56c71db248c5a834c30fbbce21 +0 -0
  22. llama_cpp_python-0.2.25/.git/objects/34/6b4631ebd1f4af85e9988d4a528e00edba6375 +0 -0
  23. llama_cpp_python-0.2.25/.git/objects/37/556bf9c4f83f51e76682316ff4ea3aed58a279 +0 -0
  24. llama_cpp_python-0.2.25/.git/objects/64/b567b4f3142efeae284deeab2342122d7e62bd +0 -0
  25. llama_cpp_python-0.2.25/.git/objects/78/8732bd3ba7ed71b0e49fb2dfe42d4ed781c0eb +0 -0
  26. llama_cpp_python-0.2.25/.git/objects/7c/1dececc933fdfba00ee95b5bed81f447a21333 +0 -0
  27. llama_cpp_python-0.2.24/.git/objects/3d/07614e35e03d55630abf4e92857441fdcaf91f → llama_cpp_python-0.2.25/.git/objects/80/7b0f57a8a873e58ade0ff0f5b0bcf0ff66b7f9 +0 -0
  28. llama_cpp_python-0.2.25/.git/objects/85/21e7721390edb971bb04098cba2d50446b3d8f +0 -0
  29. llama_cpp_python-0.2.25/.git/objects/8e/32d2c0edce725a47b5845463133919cd766a61 +0 -0
  30. llama_cpp_python-0.2.25/.git/objects/97/fe6e180b574c24eb4f07ef229981a3ac478bb7 +0 -0
  31. llama_cpp_python-0.2.25/.git/objects/9d/3d3559849603efda6f3c8181684e4d19e0ec79 +0 -0
  32. llama_cpp_python-0.2.25/.git/objects/b9/373b7ac641e6e9c8d8cc64951139205d91d8bc +0 -0
  33. llama_cpp_python-0.2.25/.git/objects/c5/4e4eb5ce2636abd78df46a7616cfe9196a1198 +0 -0
  34. llama_cpp_python-0.2.25/.git/objects/c8/4fd04498c2fb188ff7c2a59473035fc90eb990 +0 -0
  35. llama_cpp_python-0.2.25/.git/objects/cd/351ba33849dcf6af35b493f7405962fa1625d4 +0 -0
  36. llama_cpp_python-0.2.25/.git/objects/f0/827d762e852a21f6406c469300899d5f509b8f +0 -0
  37. llama_cpp_python-0.2.25/.git/objects/fa/dfc5fb4fe6f5eb6d5d98b62519e374a5202b00 +0 -0
  38. llama_cpp_python-0.2.25/.git/refs/tags/v0.2.25 +1 -0
  39. llama_cpp_python-0.2.25/.git/shallow +1 -0
  40. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/CHANGELOG.md +9 -0
  41. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/PKG-INFO +4 -1
  42. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/README.md +3 -0
  43. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/docs/server.md +95 -1
  44. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/llama_cpp/__init__.py +1 -1
  45. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/llama_cpp/llama.py +17 -9
  46. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/llama_cpp/llama_cpp.py +34 -19
  47. llama_cpp_python-0.2.25/llama_cpp/server/__main__.py +88 -0
  48. llama_cpp_python-0.2.25/llama_cpp/server/app.py +350 -0
  49. llama_cpp_python-0.2.25/llama_cpp/server/cli.py +97 -0
  50. llama_cpp_python-0.2.25/llama_cpp/server/errors.py +210 -0
  51. llama_cpp_python-0.2.25/llama_cpp/server/model.py +126 -0
  52. llama_cpp_python-0.2.25/llama_cpp/server/settings.py +167 -0
  53. llama_cpp_python-0.2.25/llama_cpp/server/types.py +266 -0
  54. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/workflows/docker.yml +32 -2
  55. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.gitignore +1 -0
  56. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/CMakeLists.txt +4 -0
  57. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/Makefile +28 -6
  58. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/README.md +21 -7
  59. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/build-info.cpp +1 -1
  60. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/common.cpp +1 -1
  61. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/common.h +2 -1
  62. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/CMakeLists.txt +1 -0
  63. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/baby-llama/baby-llama.cpp +3 -12
  64. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/export-lora/export-lora.cpp +1 -1
  65. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/finetune/finetune.cpp +20 -22
  66. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  67. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/gguf/gguf.cpp +0 -1
  68. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llava/clip.cpp +1 -7
  69. llama_cpp_python-0.2.25/vendor/llama.cpp/examples/lookup/CMakeLists.txt +5 -0
  70. llama_cpp_python-0.2.25/vendor/llama.cpp/examples/lookup/README.md +13 -0
  71. llama_cpp_python-0.2.25/vendor/llama.cpp/examples/lookup/lookup.cpp +230 -0
  72. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +5 -9
  73. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-alloc.c +13 -5
  74. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-backend-impl.h +12 -8
  75. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-backend.c +75 -5
  76. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-backend.h +7 -0
  77. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-cuda.cu +214 -129
  78. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-metal.h +3 -0
  79. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-metal.m +190 -44
  80. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-quants.c +2 -2
  81. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml.c +29 -41
  82. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml.h +12 -10
  83. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/README.md +1 -1
  84. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/gguf/vocab.py +1 -1
  85. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/llama.cpp +624 -747
  86. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/llama.h +7 -3
  87. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-backend-ops.cpp +5 -4
  88. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-grad0.cpp +5 -5
  89. llama_cpp_python-0.2.24/.git/FETCH_HEAD +0 -1
  90. llama_cpp_python-0.2.24/.git/HEAD +0 -1
  91. llama_cpp_python-0.2.24/.git/index +0 -0
  92. llama_cpp_python-0.2.24/.git/logs/HEAD +0 -1
  93. llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/HEAD +0 -1
  94. llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/index +0 -0
  95. llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/logs/HEAD +0 -2
  96. llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/logs/refs/heads/master +0 -1
  97. llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/logs/refs/remotes/origin/HEAD +0 -1
  98. llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/objects/pack/pack-b630c306381137c42fbadc02d9f2c158165b39b1.idx +0 -0
  99. llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/objects/pack/pack-b630c306381137c42fbadc02d9f2c158165b39b1.rev +0 -0
  100. llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/packed-refs +0 -2
  101. llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/refs/heads/master +0 -1
  102. llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/refs/tags/b1660 +0 -1
  103. llama_cpp_python-0.2.24/.git/modules/vendor/llama.cpp/shallow +0 -1
  104. llama_cpp_python-0.2.24/.git/objects/00/031ba49010ec116b38b81ec4f6902d35e44313 +0 -0
  105. llama_cpp_python-0.2.24/.git/objects/1c/e1f0b7c1c9711fd12aa10073b82dc0a156a384 +0 -0
  106. llama_cpp_python-0.2.24/.git/objects/2f/413db166489b25b74d502e2acc4f6c54269247 +0 -0
  107. llama_cpp_python-0.2.24/.git/objects/45/fc5a8579674a501e1772722b3e1e2c775dd01f +0 -0
  108. llama_cpp_python-0.2.24/.git/objects/4e/1e5622839e64c024cfbfb9beb903d8ed75849f +0 -0
  109. llama_cpp_python-0.2.24/.git/objects/4f/341a9a49ea26463a218bb3f3b178ba5c7495be +0 -0
  110. llama_cpp_python-0.2.24/.git/objects/50/df3282f98d691e83e44ef9eca34a9f01f3a769 +0 -0
  111. llama_cpp_python-0.2.24/.git/objects/53/8e3ff160a5b155620234989336931e2e9f35b7 +0 -0
  112. llama_cpp_python-0.2.24/.git/objects/5c/f12f19f63a787b5ca00ef8ed698397dd6bfdbf +0 -0
  113. llama_cpp_python-0.2.24/.git/objects/5e/863d8a3bf9384f5574166385d63b6896d929a6 +0 -1
  114. llama_cpp_python-0.2.24/.git/objects/a8/62a27c09eba0394770472bc9c6e7f6ea6243df +0 -0
  115. llama_cpp_python-0.2.24/.git/objects/f1/698ab834de40dfe365a8c198f0349a960ec4c7 +0 -0
  116. llama_cpp_python-0.2.24/.git/objects/fa/39047f8f65be84af6bd5ebad4f976912d3c4e0 +0 -0
  117. llama_cpp_python-0.2.24/.git/refs/tags/v0.2.24 +0 -1
  118. llama_cpp_python-0.2.24/.git/shallow +0 -1
  119. llama_cpp_python-0.2.24/llama_cpp/server/__main__.py +0 -101
  120. llama_cpp_python-0.2.24/llama_cpp/server/app.py +0 -912
  121. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.dockerignore +0 -0
  122. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/description +0 -0
  123. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/hooks/applypatch-msg.sample +0 -0
  124. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/hooks/commit-msg.sample +0 -0
  125. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/hooks/fsmonitor-watchman.sample +0 -0
  126. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/hooks/post-update.sample +0 -0
  127. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/hooks/pre-applypatch.sample +0 -0
  128. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/hooks/pre-commit.sample +0 -0
  129. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/hooks/pre-merge-commit.sample +0 -0
  130. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/hooks/pre-push.sample +0 -0
  131. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/hooks/pre-rebase.sample +0 -0
  132. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/hooks/pre-receive.sample +0 -0
  133. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/hooks/prepare-commit-msg.sample +0 -0
  134. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/hooks/push-to-checkout.sample +0 -0
  135. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/hooks/sendemail-validate.sample +0 -0
  136. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/hooks/update.sample +0 -0
  137. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/info/exclude +0 -0
  138. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/description +0 -0
  139. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/applypatch-msg.sample +0 -0
  140. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/commit-msg.sample +0 -0
  141. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/fsmonitor-watchman.sample +0 -0
  142. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/post-update.sample +0 -0
  143. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/pre-applypatch.sample +0 -0
  144. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/pre-commit.sample +0 -0
  145. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/pre-merge-commit.sample +0 -0
  146. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/pre-push.sample +0 -0
  147. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/pre-rebase.sample +0 -0
  148. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/pre-receive.sample +0 -0
  149. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/prepare-commit-msg.sample +0 -0
  150. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/push-to-checkout.sample +0 -0
  151. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/sendemail-validate.sample +0 -0
  152. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/hooks/update.sample +0 -0
  153. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/info/exclude +0 -0
  154. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/modules/vendor/llama.cpp/refs/remotes/origin/HEAD +0 -0
  155. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/03/7f96a2dde3c50a29a26eaacf577f4ecac7c027 +0 -0
  156. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/04/5856ea2ffe697ec33db1c1c989bd45cde5bb3d +0 -0
  157. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/04/7bc14424303575f73af90611fec827334f54e8 +0 -0
  158. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/0e/2a907c0e40de1a432ee0cd2e3d01c634df1a37 +0 -0
  159. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/0e/f132b07175867c07ad06fa22ca6b95eca67b59 +0 -0
  160. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/11/2a0f7872fa9244bf38729a2722dc5c08dec20c +0 -0
  161. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/11/b6d5c26e3f7157658952b8ec353e985d522fac +0 -0
  162. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/13/c5b6b0df5f67e80cbe584909b83777901265a1 +0 -0
  163. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/17/1f357f53b0de535157a2c9b4de04582784ec97 +0 -0
  164. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/19/9bd4ffbf88c68c98b52c05f388dfa92716f6b7 +0 -0
  165. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/23/e37d4d40e5ec0bfd85b5e928834d58e2cf0da6 +0 -0
  166. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/24/04228325d88c59b819acaedb20b15635f75df9 +0 -0
  167. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/27/2e4767b47397d1529e6a2d01298144d9d746ba +0 -0
  168. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/27/a6b1e5042318f85dc99c9f24f4dd6fc472d242 +0 -0
  169. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/29/03e0146d304bcacbfabfe71f171a2edc03043e +0 -0
  170. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/2c/c6fb02dcb3226c1ecf0fbed6453eb3acadeaa2 +0 -0
  171. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/2e/24903c21db75253911a70039ac5622672db813 +0 -0
  172. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/39/697bfc2538bd4558018d0d721d6d6028c4bb56 +0 -0
  173. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/3a/1d7180d508818fe957923e00dcd8950938632d +0 -0
  174. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/3a/6457dcdfd47e764654bacae0ba8347976b645a +0 -0
  175. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/41/cc68ea2402cf682807649d841e7c0f4175db01 +0 -0
  176. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/44/b6d4a35d3caf0c65aef0a1c77ff0ab4077d405 +0 -0
  177. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/47/4503fdfc554d8caabee4f321a80427f8c7d696 +0 -0
  178. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/4b/3189dd1a54be3bc416786ddf184dd047dc4b20 +0 -0
  179. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/51/f357200f8b998031f4be924e11ed2ae4bf3fea +0 -0
  180. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/55/d08db5f514fb6847938e3d6489b99f737ba6e3 +0 -0
  181. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/56/2410fe1a4c4093d62e1705933eaf66c6d99a2c +0 -0
  182. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/5b/51e98ce432974ff031367f8937babe755e3d73 +0 -0
  183. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/5d/f12aaf53a0e85f55e1aa0e5167bc831ab32783 +0 -0
  184. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/5f/350ffe99abe7297b2839f36fd0eaeb98887805 +0 -0
  185. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/60/bc7aef42aac0409cfdca666ad2ff6f516d7b5b +0 -0
  186. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/61/027ef99725c50b0891fdbf0bf263a33abe648f +0 -0
  187. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/68/7316b327ca038d26a338b3070a7d4698322515 +0 -0
  188. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/74/7c6130e3cb1479d20e2013b1dd3db3379c2266 +0 -0
  189. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/76/e26fbd106895fba52861f8ac1e11cc6ee2a307 +0 -0
  190. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/79/5dad726ae91f330c56e49bc188080b5b3b5ba9 +0 -0
  191. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/79/a9e67a1aee09c6d182f240ba5eef32feabcbce +0 -0
  192. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/7a/7b899ec7dd48d192cba14ac9c8e9df4233f7fb +0 -0
  193. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/7d/5498f9d2c49c1f731b47845d845f6c5e16a3d9 +0 -0
  194. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/7d/6c970483161eaf43cfa9d50010c071d4953053 +0 -0
  195. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/7e/df0975dc12ccc95ad14de085f07efe6d65c620 +0 -0
  196. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/7e/e8f748eab47180cea09c0ad8e75c3b991b4af4 +0 -0
  197. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/82/e5c4487e57d6d59c901a73bdd2a9bc172fee7c +0 -0
  198. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/8b/d03f88a1895cbf3ef249e13df79ee0efda779d +0 -0
  199. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/90/fdf9b2be7bbfb62c2b1463d6c95cc2d26ebb5c +0 -0
  200. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/91/abb11fdf507883caeeb2d2958e1c65fb6cbdc1 +0 -0
  201. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/99/32d6130f5552a9b85c8b15b4ac6bc26b1068ce +0 -0
  202. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/9d/0ec2f705618e591cfa8d6512cb9a96b3da75f1 +0 -0
  203. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/a9/e51cdc1672134ec9af66c9eccf09f6da4ceccd +0 -0
  204. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/ac/82b8fbb81087ec9b3a72d9e377102a31b28d16 +0 -0
  205. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/ae/c023cd61b3bb83bac968fffdd6aa0810af7c3d +0 -0
  206. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/af/f397f476fb7773d0e89b0e8913c8b1f97ca3e4 +0 -0
  207. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/b1/f90b91594f496ad9f27b1a68584984f4b523d2 +0 -0
  208. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/b5/affaa9d6087f3888dd9eedea209bb214b6e135 +0 -0
  209. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/b9/1632f5bc787c1404600c894a6a4126359747d8 +0 -0
  210. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/bb/cbbe7d61558adde3cbfd0c7a63a67c27ed6d30 +0 -0
  211. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/c8/5e73d2b657bb05ed99309615d67bac93d9f86e +0 -0
  212. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/ca/e7ebb7a833dafcd402a96bea3a9574f74f0ed5 +0 -0
  213. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/ca/ebbb67fdb02a0a8897d4e4826ea046a9931f6f +0 -0
  214. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/cc/6a3a7252ea6e698614f0629d4bc040ab6ca717 +0 -0
  215. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/d3/329eec3bac6ce7e54c76b77ac9bf99fab0fe3f +0 -0
  216. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/da/c33b74dddf06fcfc01244044eebb102cfcea37 +0 -0
  217. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/e2/13518b95011cb6ee783986624c3b6de8659f81 +0 -0
  218. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/e4/1f375774e6945e445bfb179502b128fe22dda7 +0 -0
  219. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 +0 -0
  220. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/e9/30609ff5c479a1d5e12a8f3993582d421a6326 +0 -0
  221. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/e9/48f8deb150039c6853e14537a8dd0cc9002b72 +0 -0
  222. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/eb/9a2cfa9167df02f136502af79738c71363abfd +0 -0
  223. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/ef/1b2c0162e8edd321e2b9c1ce375d96f1f1d048 +0 -0
  224. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/f1/b8e9d154231932c4b7b9b59611626764e68632 +0 -0
  225. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/f2/0813765a70679e8a063871c9ef75d75c65ccb6 +0 -0
  226. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/fc/25ff5160028dee3570249abc40cd57780bcca9 +0 -0
  227. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/fc/ef8cd800ee8a265b146748d178cb56b5632bf3 +0 -0
  228. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/fd/64c09b37947c97e58903ce570785c657d56722 +0 -0
  229. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/fe/b0ed68d94eac48b844fd587ddfb808649716a1 +0 -0
  230. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.git/objects/ff/3e950cd1110fe552912cea4c268c4023d2b737 +0 -0
  231. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  232. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  233. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.github/dependabot.yml +0 -0
  234. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.github/workflows/build-and-release.yaml +0 -0
  235. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.github/workflows/build-docker.yaml +0 -0
  236. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.github/workflows/publish-to-test.yaml +0 -0
  237. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.github/workflows/publish.yaml +0 -0
  238. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.github/workflows/test-pypi.yaml +0 -0
  239. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.github/workflows/test.yaml +0 -0
  240. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.gitignore +0 -0
  241. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.gitmodules +0 -0
  242. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/.readthedocs.yaml +0 -0
  243. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/CMakeLists.txt +0 -0
  244. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/LICENSE.md +0 -0
  245. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/Makefile +0 -0
  246. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/docker/README.md +0 -0
  247. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/docker/cuda_simple/Dockerfile +0 -0
  248. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/docker/open_llama/Dockerfile +0 -0
  249. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/docker/open_llama/build.sh +0 -0
  250. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/docker/open_llama/hug_model.py +0 -0
  251. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/docker/open_llama/start.sh +0 -0
  252. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/docker/open_llama/start_server.sh +0 -0
  253. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/docker/openblas_simple/Dockerfile +0 -0
  254. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/docker/simple/Dockerfile +0 -0
  255. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/docker/simple/run.sh +0 -0
  256. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/docs/api-reference.md +0 -0
  257. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/docs/changelog.md +0 -0
  258. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/docs/index.md +0 -0
  259. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/docs/install/macos.md +0 -0
  260. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/docs/requirements.txt +0 -0
  261. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/examples/high_level_api/fastapi_server.py +0 -0
  262. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/examples/high_level_api/high_level_api_embedding.py +0 -0
  263. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/examples/high_level_api/high_level_api_inference.py +0 -0
  264. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/examples/high_level_api/high_level_api_streaming.py +0 -0
  265. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/examples/high_level_api/langchain_custom_llm.py +0 -0
  266. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/examples/low_level_api/Chat.py +0 -0
  267. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/examples/low_level_api/Miku.py +0 -0
  268. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/examples/low_level_api/ReasonAct.py +0 -0
  269. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/examples/low_level_api/common.py +0 -0
  270. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/examples/low_level_api/low_level_api_chat_cpp.py +0 -0
  271. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/examples/low_level_api/low_level_api_llama_cpp.py +0 -0
  272. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/examples/low_level_api/quantize.py +0 -0
  273. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/examples/low_level_api/readme/low_level_api_llama_cpp.md +0 -0
  274. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/examples/low_level_api/util.py +0 -0
  275. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/examples/notebooks/Batching.ipynb +0 -0
  276. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/examples/notebooks/Clients.ipynb +0 -0
  277. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/examples/notebooks/Functions.ipynb +0 -0
  278. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/examples/notebooks/Guidance.ipynb +0 -0
  279. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/examples/notebooks/Multimodal.ipynb +0 -0
  280. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/examples/notebooks/PerformanceTuning.ipynb +0 -0
  281. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/llama_cpp/_utils.py +0 -0
  282. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/llama_cpp/llama_chat_format.py +0 -0
  283. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/llama_cpp/llama_grammar.py +0 -0
  284. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/llama_cpp/llama_types.py +0 -0
  285. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/llama_cpp/llava_cpp.py +0 -0
  286. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/llama_cpp/py.typed +0 -0
  287. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/llama_cpp/server/__init__.py +0 -0
  288. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/mkdocs.yml +0 -0
  289. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/pyproject.toml +0 -0
  290. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/tests/test_grammar.py +0 -0
  291. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/tests/test_llama.py +0 -0
  292. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.clang-tidy +0 -0
  293. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.devops/cloud-v-pipeline +0 -0
  294. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.devops/full-cuda.Dockerfile +0 -0
  295. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.devops/full-rocm.Dockerfile +0 -0
  296. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.devops/full.Dockerfile +0 -0
  297. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.devops/main-cuda.Dockerfile +0 -0
  298. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.devops/main-rocm.Dockerfile +0 -0
  299. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.devops/main.Dockerfile +0 -0
  300. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.devops/tools.sh +0 -0
  301. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.dockerignore +0 -0
  302. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.ecrc +0 -0
  303. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.editorconfig +0 -0
  304. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.flake8 +0 -0
  305. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.git +0 -0
  306. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/ISSUE_TEMPLATE/bug.md +0 -0
  307. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/ISSUE_TEMPLATE/enhancement.md +0 -0
  308. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/workflows/build.yml +0 -0
  309. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/workflows/code-coverage.yml +0 -0
  310. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/workflows/editorconfig.yml +0 -0
  311. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/workflows/gguf-publish.yml +0 -0
  312. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/workflows/python-lint.yml +0 -0
  313. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/workflows/tidy-post.yml +0 -0
  314. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/workflows/tidy-review.yml +0 -0
  315. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.github/workflows/zig-build.yml +0 -0
  316. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/.pre-commit-config.yaml +0 -0
  317. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/LICENSE +0 -0
  318. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/Package.swift +0 -0
  319. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/SHA256SUMS +0 -0
  320. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/build.zig +0 -0
  321. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ci/README.md +0 -0
  322. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ci/run.sh +0 -0
  323. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/cmake/FindSIMD.cmake +0 -0
  324. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/codecov.yml +0 -0
  325. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/CMakeLists.txt +0 -0
  326. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/base64.hpp +0 -0
  327. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/build-info.cpp.in +0 -0
  328. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/console.cpp +0 -0
  329. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/console.h +0 -0
  330. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/grammar-parser.cpp +0 -0
  331. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/grammar-parser.h +0 -0
  332. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/log.h +0 -0
  333. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/sampling.cpp +0 -0
  334. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/sampling.h +0 -0
  335. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/stb_image.h +0 -0
  336. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/train.cpp +0 -0
  337. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/common/train.h +0 -0
  338. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/convert-hf-to-gguf.py +0 -0
  339. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/convert-llama-ggml-to-gguf.py +0 -0
  340. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/convert-lora-to-ggml.py +0 -0
  341. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/convert-persimmon-to-gguf.py +0 -0
  342. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/convert.py +0 -0
  343. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/docs/BLIS.md +0 -0
  344. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/docs/llama-star/idea-arch.key +0 -0
  345. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/docs/llama-star/idea-arch.pdf +0 -0
  346. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/docs/token_generation_performance_tips.md +0 -0
  347. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/Miku.sh +0 -0
  348. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/alpaca.sh +0 -0
  349. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -0
  350. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched/CMakeLists.txt +0 -0
  351. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched/README.md +0 -0
  352. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched/batched.cpp +0 -0
  353. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched-bench/CMakeLists.txt +0 -0
  354. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched-bench/README.md +0 -0
  355. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched-bench/batched-bench.cpp +0 -0
  356. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched.swift/.gitignore +0 -0
  357. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched.swift/Makefile +0 -0
  358. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched.swift/Package.swift +0 -0
  359. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched.swift/README.md +0 -0
  360. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/batched.swift/Sources/main.swift +0 -0
  361. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/beam-search/CMakeLists.txt +0 -0
  362. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/beam-search/beam-search.cpp +0 -0
  363. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/benchmark/CMakeLists.txt +0 -0
  364. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -0
  365. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/chat-13B.bat +0 -0
  366. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/chat-13B.sh +0 -0
  367. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/chat-persistent.sh +0 -0
  368. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/chat-vicuna.sh +0 -0
  369. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/chat.sh +0 -0
  370. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -0
  371. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/convert-llama2c-to-ggml/README.md +0 -0
  372. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -0
  373. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/embedding/CMakeLists.txt +0 -0
  374. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/embedding/README.md +0 -0
  375. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/embedding/embedding.cpp +0 -0
  376. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/export-lora/CMakeLists.txt +0 -0
  377. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/export-lora/README.md +0 -0
  378. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/finetune/CMakeLists.txt +0 -0
  379. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/finetune/README.md +0 -0
  380. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/finetune/convert-finetune-checkpoint-to-gguf.py +0 -0
  381. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/finetune/finetune.sh +0 -0
  382. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/gpt4all.sh +0 -0
  383. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/infill/CMakeLists.txt +0 -0
  384. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/infill/README.md +0 -0
  385. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/infill/infill.cpp +0 -0
  386. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/jeopardy/README.md +0 -0
  387. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/jeopardy/graph.py +0 -0
  388. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/jeopardy/jeopardy.sh +0 -0
  389. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/jeopardy/qasheet.csv +0 -0
  390. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/jeopardy/questions.txt +0 -0
  391. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/json-schema-to-grammar.py +0 -0
  392. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama-bench/CMakeLists.txt +0 -0
  393. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama-bench/README.md +0 -0
  394. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama-bench/llama-bench.cpp +0 -0
  395. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/.gitignore +0 -0
  396. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/README.md +0 -0
  397. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift +0 -0
  398. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.cpp.swift/bridging-header.h +0 -0
  399. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/AccentColor.colorset/Contents.json +0 -0
  400. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/AppIcon.appiconset/Contents.json +0 -0
  401. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/Contents.json +0 -0
  402. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Models/LlamaState.swift +0 -0
  403. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/Preview Content/Preview Assets.xcassets/Contents.json +0 -0
  404. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift +0 -0
  405. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/DownloadButton.swift +0 -0
  406. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui/llama_swiftuiApp.swift +0 -0
  407. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj +0 -0
  408. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/contents.xcworkspacedata +0 -0
  409. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +0 -0
  410. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama.vim +0 -0
  411. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama2-13b.sh +0 -0
  412. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llama2.sh +0 -0
  413. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llava/CMakeLists.txt +0 -0
  414. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llava/README.md +0 -0
  415. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llava/clip.h +0 -0
  416. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llava/convert-image-encoder-to-gguf.py +0 -0
  417. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llava/llava-cli.cpp +0 -0
  418. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llava/llava-surgery.py +0 -0
  419. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llava/llava.cpp +0 -0
  420. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llava/llava.h +0 -0
  421. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/llm.vim +0 -0
  422. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/lookahead/CMakeLists.txt +0 -0
  423. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/lookahead/README.md +0 -0
  424. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/lookahead/lookahead.cpp +0 -0
  425. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/main/CMakeLists.txt +0 -0
  426. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/main/README.md +0 -0
  427. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/main/main.cpp +0 -0
  428. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/main-cmake-pkg/.gitignore +0 -0
  429. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -0
  430. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/main-cmake-pkg/README.md +0 -0
  431. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/make-ggml.py +0 -0
  432. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/metal/CMakeLists.txt +0 -0
  433. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/metal/metal.cpp +0 -0
  434. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/parallel/CMakeLists.txt +0 -0
  435. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/parallel/README.md +0 -0
  436. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/parallel/parallel.cpp +0 -0
  437. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/perplexity/CMakeLists.txt +0 -0
  438. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/perplexity/README.md +0 -0
  439. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/perplexity/perplexity.cpp +0 -0
  440. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/quantize/CMakeLists.txt +0 -0
  441. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/quantize/README.md +0 -0
  442. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/quantize/quantize.cpp +0 -0
  443. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -0
  444. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/quantize-stats/quantize-stats.cpp +0 -0
  445. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/reason-act.sh +0 -0
  446. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -0
  447. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -0
  448. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/CMakeLists.txt +0 -0
  449. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/README.md +0 -0
  450. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/api_like_OAI.py +0 -0
  451. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/chat-llama2.sh +0 -0
  452. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/chat.mjs +0 -0
  453. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/chat.sh +0 -0
  454. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/completion.js.hpp +0 -0
  455. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/deps.sh +0 -0
  456. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/httplib.h +0 -0
  457. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/index.html.hpp +0 -0
  458. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/index.js.hpp +0 -0
  459. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/json-schema-to-grammar.mjs.hpp +0 -0
  460. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/json.hpp +0 -0
  461. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/public/completion.js +0 -0
  462. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/public/index.html +0 -0
  463. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/public/index.js +0 -0
  464. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/public/json-schema-to-grammar.mjs +0 -0
  465. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server/server.cpp +0 -0
  466. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/server-llama2-13B.sh +0 -0
  467. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/simple/CMakeLists.txt +0 -0
  468. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/simple/README.md +0 -0
  469. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/simple/simple.cpp +0 -0
  470. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/speculative/CMakeLists.txt +0 -0
  471. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/speculative/README.md +0 -0
  472. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/speculative/speculative.cpp +0 -0
  473. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/tokenize/CMakeLists.txt +0 -0
  474. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/tokenize/tokenize.cpp +0 -0
  475. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +0 -0
  476. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/train-text-from-scratch/README.md +0 -0
  477. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py +0 -0
  478. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/flake.lock +0 -0
  479. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/flake.nix +0 -0
  480. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-alloc.h +0 -0
  481. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-cuda.h +0 -0
  482. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-impl.h +0 -0
  483. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-metal.metal +0 -0
  484. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-mpi.c +0 -0
  485. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-mpi.h +0 -0
  486. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-opencl.cpp +0 -0
  487. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-opencl.h +0 -0
  488. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/ggml-quants.h +0 -0
  489. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/LICENSE +0 -0
  490. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/examples/writer.py +0 -0
  491. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/gguf/__init__.py +0 -0
  492. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/gguf/constants.py +0 -0
  493. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/gguf/gguf.py +0 -0
  494. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/gguf/gguf_reader.py +0 -0
  495. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/gguf/gguf_writer.py +0 -0
  496. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/gguf/py.typed +0 -0
  497. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/gguf/tensor_mapping.py +0 -0
  498. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/pyproject.toml +0 -0
  499. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/scripts/__init__.py +0 -0
  500. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/scripts/gguf-convert-endian.py +0 -0
  501. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/scripts/gguf-dump.py +0 -0
  502. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/scripts/gguf-set-metadata.py +0 -0
  503. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/gguf-py/tests/test_gguf.py +0 -0
  504. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/grammars/README.md +0 -0
  505. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/grammars/arithmetic.gbnf +0 -0
  506. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/grammars/c.gbnf +0 -0
  507. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/grammars/chess.gbnf +0 -0
  508. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/grammars/japanese.gbnf +0 -0
  509. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/grammars/json.gbnf +0 -0
  510. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/grammars/json_arr.gbnf +0 -0
  511. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/grammars/list.gbnf +0 -0
  512. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/media/llama-leader.jpeg +0 -0
  513. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/media/llama0-banner.png +0 -0
  514. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/media/llama0-logo.png +0 -0
  515. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/media/llama1-banner.png +0 -0
  516. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/media/llama1-logo.png +0 -0
  517. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/mypy.ini +0 -0
  518. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/pocs/CMakeLists.txt +0 -0
  519. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/pocs/vdot/CMakeLists.txt +0 -0
  520. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/pocs/vdot/q8dot.cpp +0 -0
  521. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/pocs/vdot/vdot.cpp +0 -0
  522. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/LLM-questions.txt +0 -0
  523. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/alpaca.txt +0 -0
  524. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/assistant.txt +0 -0
  525. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/chat-with-baichuan.txt +0 -0
  526. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/chat-with-bob.txt +0 -0
  527. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/chat-with-qwen.txt +0 -0
  528. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -0
  529. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -0
  530. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/chat.txt +0 -0
  531. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/dan-modified.txt +0 -0
  532. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/dan.txt +0 -0
  533. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/mnemonics.txt +0 -0
  534. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/parallel-questions.txt +0 -0
  535. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/prompts/reason-act.txt +0 -0
  536. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/requirements-hf-to-gguf.txt +0 -0
  537. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/requirements.txt +0 -0
  538. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/run_with_preset.py +0 -0
  539. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/LlamaConfig.cmake.in +0 -0
  540. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/build-info.cmake +0 -0
  541. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/build-info.sh +0 -0
  542. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/convert-gg.sh +0 -0
  543. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/gen-build-info-cpp.cmake +0 -0
  544. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/get-flags.mk +0 -0
  545. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/get-wikitext-2.sh +0 -0
  546. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/qnt-all.sh +0 -0
  547. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/run-all-perf.sh +0 -0
  548. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/run-all-ppl.sh +0 -0
  549. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/server-llm.sh +0 -0
  550. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/sync-ggml.sh +0 -0
  551. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/scripts/verify-checksum-models.py +0 -0
  552. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/spm-headers/ggml.h +0 -0
  553. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/spm-headers/llama.h +0 -0
  554. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/CMakeLists.txt +0 -0
  555. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-c.c +0 -0
  556. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-double-float.cpp +0 -0
  557. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-grammar-parser.cpp +0 -0
  558. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-llama-grammar.cpp +0 -0
  559. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-opt.cpp +0 -0
  560. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-quantize-fns.cpp +0 -0
  561. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-quantize-perf.cpp +0 -0
  562. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-rope.cpp +0 -0
  563. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-sampling.cpp +0 -0
  564. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-tokenizer-0-falcon.cpp +0 -0
  565. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-tokenizer-0-falcon.py +0 -0
  566. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-tokenizer-0-llama.cpp +0 -0
  567. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-tokenizer-0-llama.py +0 -0
  568. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -0
  569. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/tests/test-tokenizer-1-llama.cpp +0 -0
  570. {llama_cpp_python-0.2.24 → llama_cpp_python-0.2.25}/vendor/llama.cpp/unicode.h +0 -0
@@ -0,0 +1 @@
1
+ 37556bf9c4f83f51e76682316ff4ea3aed58a279 '37556bf9c4f83f51e76682316ff4ea3aed58a279' of https://github.com/abetlen/llama-cpp-python
@@ -0,0 +1 @@
1
+ 37556bf9c4f83f51e76682316ff4ea3aed58a279
@@ -9,7 +9,7 @@
9
9
  [gc]
10
10
  auto = 0
11
11
  [http "https://github.com/"]
12
- extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzX3JrYlR4S25WQUV4a0N2bEZ4QVF0cVR6dlo0VTM5eDNENGxubg==
12
+ extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzXzMzOEY0bUxVSzA1UlkwTkhqNDgzaXlqZW8xRVBxMDFBQ1JITQ==
13
13
  [submodule "vendor/llama.cpp"]
14
14
  active = true
15
15
  url = https://github.com/ggerganov/llama.cpp.git
Binary file
@@ -0,0 +1 @@
1
+ 0000000000000000000000000000000000000000 37556bf9c4f83f51e76682316ff4ea3aed58a279 runner <runner@fv-az1149-712.p0yfcspwqgdenibuqhnf5ysfnc.dx.internal.cloudapp.net> 1703276562 +0000 checkout: moving from master to refs/tags/v0.2.25
@@ -0,0 +1 @@
1
+ 7082d24cec35e9ce9147535a2224dfc67ee0a78c
@@ -13,7 +13,7 @@
13
13
  [gc]
14
14
  auto = 0
15
15
  [http "https://github.com/"]
16
- extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzX3JrYlR4S25WQUV4a0N2bEZ4QVF0cVR6dlo0VTM5eDNENGxubg==
16
+ extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzXzMzOEY0bUxVSzA1UlkwTkhqNDgzaXlqZW8xRVBxMDFBQ1JITQ==
17
17
  [url "https://github.com/"]
18
18
  insteadOf = git@github.com:
19
19
  insteadOf = org-6826477@github.com:
@@ -0,0 +1,2 @@
1
+ 0000000000000000000000000000000000000000 7082d24cec35e9ce9147535a2224dfc67ee0a78c runner <runner@fv-az1149-712.p0yfcspwqgdenibuqhnf5ysfnc.dx.internal.cloudapp.net> 1703276563 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
2
+ 7082d24cec35e9ce9147535a2224dfc67ee0a78c 7082d24cec35e9ce9147535a2224dfc67ee0a78c runner <runner@fv-az1149-712.p0yfcspwqgdenibuqhnf5ysfnc.dx.internal.cloudapp.net> 1703276563 +0000 checkout: moving from master to 7082d24cec35e9ce9147535a2224dfc67ee0a78c
@@ -0,0 +1 @@
1
+ 0000000000000000000000000000000000000000 7082d24cec35e9ce9147535a2224dfc67ee0a78c runner <runner@fv-az1149-712.p0yfcspwqgdenibuqhnf5ysfnc.dx.internal.cloudapp.net> 1703276563 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
@@ -0,0 +1 @@
1
+ 0000000000000000000000000000000000000000 7082d24cec35e9ce9147535a2224dfc67ee0a78c runner <runner@fv-az1149-712.p0yfcspwqgdenibuqhnf5ysfnc.dx.internal.cloudapp.net> 1703276563 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
@@ -0,0 +1,2 @@
1
+ # pack-refs with: peeled fully-peeled sorted
2
+ 7082d24cec35e9ce9147535a2224dfc67ee0a78c refs/remotes/origin/master
@@ -0,0 +1 @@
1
+ 7082d24cec35e9ce9147535a2224dfc67ee0a78c
@@ -0,0 +1 @@
1
+ 7082d24cec35e9ce9147535a2224dfc67ee0a78c
@@ -0,0 +1 @@
1
+ 7082d24cec35e9ce9147535a2224dfc67ee0a78c
@@ -0,0 +1 @@
1
+ 37556bf9c4f83f51e76682316ff4ea3aed58a279
@@ -0,0 +1 @@
1
+ 37556bf9c4f83f51e76682316ff4ea3aed58a279
@@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.2.25]
11
+
12
+ - feat(server): Multi model support by @D4ve-R in #931
13
+ - feat(server): Support none defaulting to infinity for completions by @swg in #111
14
+ - feat(server): Implement openai api compatible authentication by @docmeth2 in #1010
15
+ - fix: text_offset of multi-token characters by @twaka in #1037
16
+ - fix: ctypes bindings for kv override by @phiharri in #1011
17
+ - fix: ctypes definitions of llama_kv_cache_view_update and llama_kv_cache_view_free. by @e-c-d in #1028
18
+
10
19
  ## [0.2.24]
11
20
 
12
21
  - feat: Update llama.cpp to ggerganov/llama.cpp@0e18b2e7d0b5c0a509ea40098def234b8d4a938a
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llama_cpp_python
3
- Version: 0.2.24
3
+ Version: 0.2.25
4
4
  Summary: Python bindings for the llama.cpp library
5
5
  Author-Email: Andrei Betlen <abetlen@gmail.com>
6
6
  License: MIT
@@ -60,11 +60,13 @@ This package provides:
60
60
  - [Local Copilot replacement](https://llama-cpp-python.readthedocs.io/en/latest/server/#code-completion)
61
61
  - [Function Calling support](https://llama-cpp-python.readthedocs.io/en/latest/server/#function-calling)
62
62
  - [Vision API support](https://llama-cpp-python.readthedocs.io/en/latest/server/#multimodal-models)
63
+ - [Multiple Models](https://llama-cpp-python.readthedocs.io/en/latest/server/#configuration-and-multi-model-support)
63
64
 
64
65
  Documentation is available at [https://llama-cpp-python.readthedocs.io/en/latest](https://llama-cpp-python.readthedocs.io/en/latest).
65
66
 
66
67
 
67
68
 
69
+
68
70
  ## Installation
69
71
 
70
72
  `llama-cpp-python` can be installed directly from PyPI as a source distribution by running:
@@ -374,6 +376,7 @@ For possible options, see [llama_cpp/llama_chat_format.py](llama_cpp/llama_chat_
374
376
  - [Local Copilot replacement](https://llama-cpp-python.readthedocs.io/en/latest/server/#code-completion)
375
377
  - [Function Calling support](https://llama-cpp-python.readthedocs.io/en/latest/server/#function-calling)
376
378
  - [Vision API support](https://llama-cpp-python.readthedocs.io/en/latest/server/#multimodal-models)
379
+ - [Multiple Models](https://llama-cpp-python.readthedocs.io/en/latest/server/#configuration-and-multi-model-support)
377
380
 
378
381
  ## Docker image
379
382
 
@@ -18,11 +18,13 @@ This package provides:
18
18
  - [Local Copilot replacement](https://llama-cpp-python.readthedocs.io/en/latest/server/#code-completion)
19
19
  - [Function Calling support](https://llama-cpp-python.readthedocs.io/en/latest/server/#function-calling)
20
20
  - [Vision API support](https://llama-cpp-python.readthedocs.io/en/latest/server/#multimodal-models)
21
+ - [Multiple Models](https://llama-cpp-python.readthedocs.io/en/latest/server/#configuration-and-multi-model-support)
21
22
 
22
23
  Documentation is available at [https://llama-cpp-python.readthedocs.io/en/latest](https://llama-cpp-python.readthedocs.io/en/latest).
23
24
 
24
25
 
25
26
 
27
+
26
28
  ## Installation
27
29
 
28
30
  `llama-cpp-python` can be installed directly from PyPI as a source distribution by running:
@@ -332,6 +334,7 @@ For possible options, see [llama_cpp/llama_chat_format.py](llama_cpp/llama_chat_
332
334
  - [Local Copilot replacement](https://llama-cpp-python.readthedocs.io/en/latest/server/#code-completion)
333
335
  - [Function Calling support](https://llama-cpp-python.readthedocs.io/en/latest/server/#function-calling)
334
336
  - [Vision API support](https://llama-cpp-python.readthedocs.io/en/latest/server/#multimodal-models)
337
+ - [Multiple Models](https://llama-cpp-python.readthedocs.io/en/latest/server/#configuration-and-multi-model-support)
335
338
 
336
339
  ## Docker image
337
340
 
@@ -32,6 +32,12 @@ python3 -m llama_cpp.server --help
32
32
 
33
33
  NOTE: All server options are also available as environment variables. For example, `--model` can be set by setting the `MODEL` environment variable.
34
34
 
35
+ Check out the server config reference below settings for more information on the available options.
36
+ CLI arguments and environment variables are available for all of the fields defined in [`ServerSettings`](#llama_cpp.server.settings.ServerSettings) and [`ModelSettings`](#llama_cpp.server.settings.ModelSettings)
37
+
38
+ Additionally the server supports configuration check out the [configuration section](#configuration-and-multi-model-support) for more information and examples.
39
+
40
+
35
41
  ## Guides
36
42
 
37
43
  ### Code Completion
@@ -121,4 +127,92 @@ response = client.chat.completions.create(
121
127
  ],
122
128
  )
123
129
  print(response)
124
- ```
130
+ ```
131
+
132
+ ## Configuration and Multi-Model Support
133
+
134
+ The server supports configuration via a JSON config file that can be passed using the `--config_file` parameter or the `CONFIG_FILE` environment variable.
135
+
136
+ ```bash
137
+ python3 -m llama_cpp.server --config_file <config_file>
138
+ ```
139
+
140
+ Config files support all of the server and model options supported by the cli and environment variables however instead of only a single model the config file can specify multiple models.
141
+
142
+ The server supports routing requests to multiple models based on the `model` parameter in the request which matches against the `model_alias` in the config file.
143
+
144
+ At the moment only a single model is loaded into memory at, the server will automatically load and unload models as needed.
145
+
146
+ ```json
147
+ {
148
+ "host": "0.0.0.0",
149
+ "port": 8080,
150
+ "models": [
151
+ {
152
+ "model": "models/OpenHermes-2.5-Mistral-7B-GGUF/openhermes-2.5-mistral-7b.Q4_K_M.gguf",
153
+ "model_alias": "gpt-3.5-turbo",
154
+ "chat_format": "chatml",
155
+ "n_gpu_layers": -1,
156
+ "offload_kqv": true,
157
+ "n_threads": 12,
158
+ "n_batch": 512,
159
+ "n_ctx": 2048
160
+ },
161
+ {
162
+ "model": "models/OpenHermes-2.5-Mistral-7B-GGUF/openhermes-2.5-mistral-7b.Q4_K_M.gguf",
163
+ "model_alias": "gpt-4",
164
+ "chat_format": "chatml",
165
+ "n_gpu_layers": -1,
166
+ "offload_kqv": true,
167
+ "n_threads": 12,
168
+ "n_batch": 512,
169
+ "n_ctx": 2048
170
+ },
171
+ {
172
+ "model": "models/ggml_llava-v1.5-7b/ggml-model-q4_k.gguf",
173
+ "model_alias": "gpt-4-vision-preview",
174
+ "chat_format": "llava-1-5",
175
+ "clip_model_path": "models/ggml_llava-v1.5-7b/mmproj-model-f16.gguf",
176
+ "n_gpu_layers": -1,
177
+ "offload_kqv": true,
178
+ "n_threads": 12,
179
+ "n_batch": 512,
180
+ "n_ctx": 2048
181
+ },
182
+ {
183
+ "model": "models/mistral-7b-v0.1-GGUF/ggml-model-Q4_K.gguf",
184
+ "model_alias": "text-davinci-003",
185
+ "n_gpu_layers": -1,
186
+ "offload_kqv": true,
187
+ "n_threads": 12,
188
+ "n_batch": 512,
189
+ "n_ctx": 2048
190
+ },
191
+ {
192
+ "model": "models/replit-code-v1_5-3b-GGUF/replit-code-v1_5-3b.Q4_0.gguf",
193
+ "model_alias": "copilot-codex",
194
+ "n_gpu_layers": -1,
195
+ "offload_kqv": true,
196
+ "n_threads": 12,
197
+ "n_batch": 1024,
198
+ "n_ctx": 9216
199
+ }
200
+ ]
201
+ }
202
+ ```
203
+
204
+ The config file format is defined by the [`ConfigFileSettings`](#llama_cpp.server.settings.ConfigFileSettings) class.
205
+
206
+ ## Server Options Reference
207
+
208
+ ::: llama_cpp.server.settings.ConfigFileSettings
209
+ options:
210
+ show_if_no_docstring: true
211
+
212
+ ::: llama_cpp.server.settings.ServerSettings
213
+ options:
214
+ show_if_no_docstring: true
215
+
216
+ ::: llama_cpp.server.settings.ModelSettings
217
+ options:
218
+ show_if_no_docstring: true
@@ -1,4 +1,4 @@
1
1
  from .llama_cpp import *
2
2
  from .llama import *
3
3
 
4
- __version__ = "0.2.24"
4
+ __version__ = "0.2.25"
@@ -1551,11 +1551,13 @@ class Llama:
1551
1551
  "utf-8", errors="ignore"
1552
1552
  )
1553
1553
  text_offset = len(prompt) + len(
1554
- self.detokenize(completion_tokens[:returned_tokens])
1554
+ self.detokenize(completion_tokens[:returned_tokens]).decode(
1555
+ "utf-8", errors="ignore"
1556
+ )
1555
1557
  )
1556
1558
  token_offset = len(prompt_tokens) + returned_tokens
1557
1559
  logits = self._scores[token_offset - 1, :]
1558
- current_logprobs = Llama.logits_to_logprobs(logits)
1560
+ current_logprobs = Llama.logits_to_logprobs(logits).tolist()
1559
1561
  sorted_logprobs = list(
1560
1562
  sorted(
1561
1563
  zip(current_logprobs, range(len(current_logprobs))),
@@ -1674,7 +1676,7 @@ class Llama:
1674
1676
  )
1675
1677
  token_offset = len(prompt_tokens) + returned_tokens - 1
1676
1678
  logits = self._scores[token_offset, :]
1677
- current_logprobs = Llama.logits_to_logprobs(logits)
1679
+ current_logprobs = Llama.logits_to_logprobs(logits).tolist()
1678
1680
  sorted_logprobs = list(
1679
1681
  sorted(
1680
1682
  zip(current_logprobs, range(len(current_logprobs))),
@@ -1789,13 +1791,19 @@ class Llama:
1789
1791
  ]
1790
1792
  all_logprobs = Llama.logits_to_logprobs(self._scores)[token_offset:]
1791
1793
  # TODO: may be able to change this loop to use np.take_along_dim
1792
- for token, token_str, logprobs_token in zip(
1793
- all_tokens, all_token_strs, all_logprobs
1794
+ for idx, (token, token_str, logprobs_token) in enumerate(
1795
+ zip(all_tokens, all_token_strs, all_logprobs)
1794
1796
  ):
1795
1797
  if token == self.token_bos():
1796
1798
  continue
1797
- text_offsets.append(text_offset)
1798
- text_offset += len(token_str)
1799
+ text_offsets.append(
1800
+ text_offset
1801
+ + len(
1802
+ self.detokenize(all_tokens[:idx]).decode(
1803
+ "utf-8", errors="ignore"
1804
+ )
1805
+ )
1806
+ )
1799
1807
  tokens.append(token_str)
1800
1808
  sorted_logprobs = list(
1801
1809
  sorted(
@@ -1909,7 +1917,7 @@ class Llama:
1909
1917
  completion_or_chunks = self._create_completion(
1910
1918
  prompt=prompt,
1911
1919
  suffix=suffix,
1912
- max_tokens=max_tokens,
1920
+ max_tokens=-1 if max_tokens is None else max_tokens,
1913
1921
  temperature=temperature,
1914
1922
  top_p=top_p,
1915
1923
  min_p=min_p,
@@ -1943,7 +1951,7 @@ class Llama:
1943
1951
  self,
1944
1952
  prompt: str,
1945
1953
  suffix: Optional[str] = None,
1946
- max_tokens: int = 128,
1954
+ max_tokens: Optional[int] = 16,
1947
1955
  temperature: float = 0.8,
1948
1956
  top_p: float = 0.95,
1949
1957
  min_p: float = 0.05,
@@ -9,6 +9,7 @@ from ctypes import (
9
9
  c_int32,
10
10
  c_uint8,
11
11
  c_uint32,
12
+ c_int64,
12
13
  c_size_t,
13
14
  c_float,
14
15
  c_double,
@@ -16,6 +17,7 @@ from ctypes import (
16
17
  POINTER,
17
18
  _Pointer, # type: ignore
18
19
  Structure,
20
+ Union as CtypesUnion,
19
21
  Array,
20
22
  )
21
23
  import pathlib
@@ -252,8 +254,8 @@ class llama_token_data_array(Structure):
252
254
 
253
255
  llama_token_data_array_p = POINTER(llama_token_data_array)
254
256
 
255
- # typedef void (*llama_progress_callback)(float progress, void *ctx);
256
- llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p)
257
+ # typedef bool (*llama_progress_callback)(float progress, void *ctx);
258
+ llama_progress_callback = ctypes.CFUNCTYPE(c_bool, c_float, c_void_p)
257
259
 
258
260
 
259
261
  # // Input data for llama_decode
@@ -317,12 +319,9 @@ class llama_batch(Structure):
317
319
  # LLAMA_KV_OVERRIDE_FLOAT,
318
320
  # LLAMA_KV_OVERRIDE_BOOL,
319
321
  # };
320
- class llama_model_kv_override_type(Structure):
321
- _fields_ = [
322
- ("LLAMA_KV_OVERRIDE_INT", c_int),
323
- ("LLAMA_KV_OVERRIDE_FLOAT", c_int),
324
- ("LLAMA_KV_OVERRIDE_BOOL", c_int),
325
- ]
322
+ LLAMA_KV_OVERRIDE_INT = 0
323
+ LLAMA_KV_OVERRIDE_FLOAT = 1
324
+ LLAMA_KV_OVERRIDE_BOOL = 2
326
325
 
327
326
  # struct llama_model_kv_override {
328
327
  # char key[128];
@@ -333,21 +332,28 @@ class llama_model_kv_override_type(Structure):
333
332
  # bool bool_value;
334
333
  # };
335
334
  # };
336
- class llama_model_kv_override(Structure):
335
+ class llama_model_kv_override_value(CtypesUnion):
337
336
  _fields_ = [
338
- ("key", ctypes.c_char * 128),
339
- ("tag", llama_model_kv_override_type),
340
- ("int_value", ctypes.c_int64),
337
+ ("int_value", c_int64),
341
338
  ("float_value", c_double),
342
339
  ("bool_value", c_bool),
343
340
  ]
344
341
 
342
+ class llama_model_kv_override(Structure):
343
+ _fields_ = [
344
+ ("key", ctypes.c_char * 128),
345
+ ("tag", c_int),
346
+ ("value", llama_model_kv_override_value),
347
+ ]
348
+
345
349
  # struct llama_model_params {
346
350
  # int32_t n_gpu_layers; // number of layers to store in VRAM
347
351
  # int32_t main_gpu; // the GPU that is used for scratch and small tensors
348
352
  # const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)
349
353
 
350
- # // called with a progress value between 0 and 1, pass NULL to disable
354
+ # // Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
355
+ # // If the provided progress_callback returns true, model loading continues.
356
+ # // If it returns false, model loading is immediately aborted.
351
357
  # llama_progress_callback progress_callback;
352
358
  # // context pointer passed to the progress callback
353
359
  # void * progress_callback_user_data;
@@ -367,7 +373,7 @@ class llama_model_params(Structure):
367
373
  n_gpu_layers (int): number of layers to store in VRAM
368
374
  main_gpu (int): the GPU that is used for scratch and small tensors
369
375
  tensor_split (ctypes.Array[ctypes.c_float]): how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)
370
- progress_callback (llama_progress_callback): called with a progress value between 0 and 1, pass NULL to disable
376
+ progress_callback (llama_progress_callback): called with a progress value between 0.0 and 1.0. Pass NULL to disable. If the provided progress_callback returns true, model loading continues. If it returns false, model loading is immediately aborted.
371
377
  progress_callback_user_data (ctypes.c_void_p): context pointer passed to the progress callback
372
378
  kv_overrides (ctypes.Array[llama_model_kv_override]): override key-value pairs of the model meta data
373
379
  vocab_only (bool): only load the vocabulary, no weights
@@ -733,8 +739,14 @@ def llama_n_ctx(ctx: llama_context_p) -> int:
733
739
 
734
740
 
735
741
  _lib.llama_n_ctx.argtypes = [llama_context_p]
736
- _lib.llama_n_ctx.restype = c_int
742
+ _lib.llama_n_ctx.restype = c_uint32
737
743
 
744
+ # LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx);
745
+ def llama_n_batch(ctx: llama_context_p) -> int:
746
+ return _lib.llama_n_batch(ctx)
747
+
748
+ _lib.llama_n_batch.argtypes = [llama_context_p]
749
+ _lib.llama_n_batch.restype = c_uint32
738
750
 
739
751
  # LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_model * model);
740
752
  def llama_vocab_type(model: llama_model_p) -> int:
@@ -1041,6 +1053,9 @@ class llama_kv_cache_view(Structure):
1041
1053
  ]
1042
1054
 
1043
1055
 
1056
+ llama_kv_cache_view_p = POINTER(llama_kv_cache_view)
1057
+
1058
+
1044
1059
  # // Create an empty KV cache view. (use only for debugging purposes)
1045
1060
  # LLAMA_API struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_max_seq);
1046
1061
  def llama_kv_cache_view_init(
@@ -1056,23 +1071,23 @@ _lib.llama_kv_cache_view_init.restype = llama_kv_cache_view
1056
1071
 
1057
1072
  # // Free a KV cache view. (use only for debugging purposes)
1058
1073
  # LLAMA_API void llama_kv_cache_view_free(struct llama_kv_cache_view * view);
1059
- def llama_kv_cache_view_free(view: llama_kv_cache_view):
1074
+ def llama_kv_cache_view_free(view: "ctypes.pointer[llama_kv_cache_view]"): # type: ignore
1060
1075
  """Free a KV cache view. (use only for debugging purposes)"""
1061
1076
  return _lib.llama_kv_cache_view_free(view)
1062
1077
 
1063
1078
 
1064
- _lib.llama_kv_cache_view_free.argtypes = [llama_kv_cache_view]
1079
+ _lib.llama_kv_cache_view_free.argtypes = [llama_kv_cache_view_p]
1065
1080
  _lib.llama_kv_cache_view_free.restype = None
1066
1081
 
1067
1082
 
1068
1083
  # // Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
1069
1084
  # LLAMA_API void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_kv_cache_view * view);
1070
- def llama_kv_cache_view_update(ctx: llama_context_p, view: llama_kv_cache_view):
1085
+ def llama_kv_cache_view_update(ctx: llama_context_p, view: "ctypes.pointer[llama_kv_cache_view]"): # type: ignore
1071
1086
  """Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)"""
1072
1087
  return _lib.llama_kv_cache_view_update(ctx, view)
1073
1088
 
1074
1089
 
1075
- _lib.llama_kv_cache_view_update.argtypes = [llama_context_p, llama_kv_cache_view]
1090
+ _lib.llama_kv_cache_view_update.argtypes = [llama_context_p, llama_kv_cache_view_p]
1076
1091
  _lib.llama_kv_cache_view_update.restype = None
1077
1092
 
1078
1093
 
@@ -0,0 +1,88 @@
1
+ """Example FastAPI server for llama.cpp.
2
+
3
+ To run this example:
4
+
5
+ ```bash
6
+ pip install fastapi uvicorn sse-starlette pydantic-settings
7
+ export MODEL=../models/7B/...
8
+ ```
9
+
10
+ Then run:
11
+ ```
12
+ uvicorn llama_cpp.server.app:create_app --reload
13
+ ```
14
+
15
+ or
16
+
17
+ ```
18
+ python3 -m llama_cpp.server
19
+ ```
20
+
21
+ Then visit http://localhost:8000/docs to see the interactive API docs.
22
+
23
+ """
24
+ from __future__ import annotations
25
+
26
+ import os
27
+ import sys
28
+ import argparse
29
+
30
+ import uvicorn
31
+
32
+ from llama_cpp.server.app import create_app
33
+ from llama_cpp.server.settings import (
34
+ Settings,
35
+ ServerSettings,
36
+ ModelSettings,
37
+ ConfigFileSettings,
38
+ )
39
+ from llama_cpp.server.cli import add_args_from_model, parse_model_from_args
40
+
41
+
42
+ def main():
43
+ description = "🦙 Llama.cpp python server. Host your own LLMs!🚀"
44
+ parser = argparse.ArgumentParser(description=description)
45
+
46
+ add_args_from_model(parser, Settings)
47
+ parser.add_argument(
48
+ "--config_file",
49
+ type=str,
50
+ help="Path to a config file to load.",
51
+ )
52
+ server_settings: ServerSettings | None = None
53
+ model_settings: list[ModelSettings] = []
54
+ args = parser.parse_args()
55
+ try:
56
+ # Load server settings from config_file if provided
57
+ config_file = os.environ.get("CONFIG_FILE", args.config_file)
58
+ if config_file:
59
+ if not os.path.exists(config_file):
60
+ raise ValueError(f"Config file {config_file} not found!")
61
+ with open(config_file, "rb") as f:
62
+ config_file_settings = ConfigFileSettings.model_validate_json(f.read())
63
+ server_settings = ServerSettings.model_validate(config_file_settings)
64
+ model_settings = config_file_settings.models
65
+ else:
66
+ server_settings = parse_model_from_args(ServerSettings, args)
67
+ model_settings = [parse_model_from_args(ModelSettings, args)]
68
+ except Exception as e:
69
+ print(e, file=sys.stderr)
70
+ parser.print_help()
71
+ sys.exit(1)
72
+ assert server_settings is not None
73
+ assert model_settings is not None
74
+ app = create_app(
75
+ server_settings=server_settings,
76
+ model_settings=model_settings,
77
+ )
78
+ uvicorn.run(
79
+ app,
80
+ host=os.getenv("HOST", server_settings.host),
81
+ port=int(os.getenv("PORT", server_settings.port)),
82
+ ssl_keyfile=server_settings.ssl_keyfile,
83
+ ssl_certfile=server_settings.ssl_certfile,
84
+ )
85
+
86
+
87
+ if __name__ == "__main__":
88
+ main()