llama-cpp-python 0.1.54__tar.gz → 0.1.56__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. llama_cpp_python-0.1.56/CHANGELOG.md +20 -0
  2. llama_cpp_python-0.1.56/Makefile +49 -0
  3. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/PKG-INFO +12 -1
  4. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/README.md +11 -0
  5. llama_cpp_python-0.1.56/docker/Dockerfile +51 -0
  6. llama_cpp_python-0.1.56/docker/README.md +46 -0
  7. llama_cpp_python-0.1.56/docker/hug_model.py +116 -0
  8. llama_cpp_python-0.1.56/docker/start_server.sh +11 -0
  9. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/examples/low_level_api/low_level_api_chat_cpp.py +11 -8
  10. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/llama_cpp/llama.py +168 -56
  11. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/llama_cpp/server/app.py +88 -58
  12. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/llama_cpp_python.egg-info/PKG-INFO +12 -1
  13. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/llama_cpp_python.egg-info/SOURCES.txt +9 -2
  14. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/llama_cpp_python.egg-info/requires.txt +1 -0
  15. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/poetry.lock +379 -156
  16. llama_cpp_python-0.1.56/poetry.toml +3 -0
  17. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/pyproject.toml +12 -5
  18. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/setup.py +2 -4
  19. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/.github/workflows/build.yml +5 -5
  20. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/CMakeLists.txt +29 -25
  21. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/Makefile +11 -1
  22. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/README.md +19 -0
  23. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/chat-persistent.sh +2 -2
  24. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/main/README.md +1 -1
  25. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/main/main.cpp +14 -4
  26. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/ggml-cuda.cu +71 -39
  27. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/ggml-opencl.cpp +1 -1
  28. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/.dockerignore +0 -0
  29. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  30. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  31. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/.github/dependabot.yml +0 -0
  32. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/.github/workflows/build-and-release.yaml +0 -0
  33. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/.github/workflows/build-docker.yaml +0 -0
  34. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/.github/workflows/publish-to-test.yaml +0 -0
  35. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/.github/workflows/publish.yaml +0 -0
  36. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/.github/workflows/test.yaml +0 -0
  37. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/.gitignore +0 -0
  38. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/.gitmodules +0 -0
  39. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/CMakeLists.txt +0 -0
  40. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/LICENSE.md +0 -0
  41. /llama_cpp_python-0.1.54/Dockerfile.cuda → /llama_cpp_python-0.1.56/docker/Dockerfile.cuda_simple +0 -0
  42. /llama_cpp_python-0.1.54/Dockerfile → /llama_cpp_python-0.1.56/docker/Dockerfile.openblas_simple +0 -0
  43. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/docs/index.md +0 -0
  44. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/examples/high_level_api/fastapi_server.py +0 -0
  45. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/examples/high_level_api/high_level_api_embedding.py +0 -0
  46. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/examples/high_level_api/high_level_api_inference.py +0 -0
  47. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/examples/high_level_api/high_level_api_streaming.py +0 -0
  48. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/examples/high_level_api/langchain_custom_llm.py +0 -0
  49. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/examples/low_level_api/Chat.py +0 -0
  50. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/examples/low_level_api/Miku.py +0 -0
  51. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/examples/low_level_api/ReasonAct.py +0 -0
  52. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/examples/low_level_api/common.py +0 -0
  53. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/examples/low_level_api/low_level_api_llama_cpp.py +0 -0
  54. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/examples/low_level_api/quantize.py +0 -0
  55. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/examples/low_level_api/util.py +0 -0
  56. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/examples/notebooks/Clients.ipynb +0 -0
  57. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/examples/notebooks/Guidance.ipynb +0 -0
  58. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/examples/notebooks/PerformanceTuning.ipynb +0 -0
  59. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/llama_cpp/__init__.py +0 -0
  60. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/llama_cpp/llama_cpp.py +0 -0
  61. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/llama_cpp/llama_types.py +0 -0
  62. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/llama_cpp/server/__init__.py +0 -0
  63. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/llama_cpp/server/__main__.py +0 -0
  64. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/llama_cpp_python.egg-info/dependency_links.txt +0 -0
  65. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/llama_cpp_python.egg-info/top_level.txt +0 -0
  66. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/mkdocs.yml +0 -0
  67. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/setup.cfg +0 -0
  68. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/tests/test_llama.py +0 -0
  69. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/.clang-tidy +0 -0
  70. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/.devops/full.Dockerfile +0 -0
  71. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/.devops/main.Dockerfile +0 -0
  72. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/.devops/tools.sh +0 -0
  73. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/.dockerignore +0 -0
  74. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/.ecrc +0 -0
  75. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/.editorconfig +0 -0
  76. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/.github/ISSUE_TEMPLATE/custom.md +0 -0
  77. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/.github/workflows/docker.yml +0 -0
  78. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/.github/workflows/editorconfig.yml +0 -0
  79. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/.github/workflows/tidy-post.yml +0 -0
  80. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/.github/workflows/tidy-review.yml +0 -0
  81. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/.gitignore +0 -0
  82. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/BLIS.md +0 -0
  83. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/LICENSE +0 -0
  84. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/Package.swift +0 -0
  85. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/SHA256SUMS +0 -0
  86. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/build.zig +0 -0
  87. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/convert-lora-to-ggml.py +0 -0
  88. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/convert-pth-to-ggml.py +0 -0
  89. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/convert.py +0 -0
  90. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/CMakeLists.txt +0 -0
  91. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/Miku.sh +0 -0
  92. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/alpaca.sh +0 -0
  93. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -0
  94. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -0
  95. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/benchmark/CMakeLists.txt +0 -0
  96. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -0
  97. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/chat-13B.bat +0 -0
  98. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/chat-13B.sh +0 -0
  99. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/chat.sh +0 -0
  100. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/common.cpp +0 -0
  101. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/common.h +0 -0
  102. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/embedding/CMakeLists.txt +0 -0
  103. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/embedding/README.md +0 -0
  104. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/embedding/embedding.cpp +0 -0
  105. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/gpt4all.sh +0 -0
  106. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/jeopardy/README.md +0 -0
  107. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/jeopardy/graph.py +0 -0
  108. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/jeopardy/jeopardy.sh +0 -0
  109. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/jeopardy/qasheet.csv +0 -0
  110. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/jeopardy/questions.txt +0 -0
  111. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/main/CMakeLists.txt +0 -0
  112. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/perplexity/CMakeLists.txt +0 -0
  113. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/perplexity/README.md +0 -0
  114. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/perplexity/perplexity.cpp +0 -0
  115. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/quantize/CMakeLists.txt +0 -0
  116. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/quantize/README.md +0 -0
  117. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/quantize/quantize.cpp +0 -0
  118. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -0
  119. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/quantize-stats/quantize-stats.cpp +0 -0
  120. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/reason-act.sh +0 -0
  121. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -0
  122. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -0
  123. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/server/CMakeLists.txt +0 -0
  124. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/server/README.md +0 -0
  125. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/server/httplib.h +0 -0
  126. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/server/json.hpp +0 -0
  127. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/examples/server/server.cpp +0 -0
  128. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/flake.lock +0 -0
  129. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/flake.nix +0 -0
  130. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/ggml-cuda.h +0 -0
  131. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/ggml-opencl.h +0 -0
  132. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/ggml.c +0 -0
  133. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/ggml.h +0 -0
  134. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/llama-util.h +0 -0
  135. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/llama.cpp +0 -0
  136. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/llama.h +0 -0
  137. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/media/llama-leader.jpeg +0 -0
  138. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/media/llama0-banner.png +0 -0
  139. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/media/llama0-logo.png +0 -0
  140. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/media/llama1-banner.png +0 -0
  141. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/media/llama1-logo.png +0 -0
  142. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/models/ggml-vocab.bin +0 -0
  143. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/pocs/CMakeLists.txt +0 -0
  144. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/pocs/vdot/CMakeLists.txt +0 -0
  145. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/pocs/vdot/q8dot.cpp +0 -0
  146. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/pocs/vdot/vdot.cpp +0 -0
  147. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/prompts/alpaca.txt +0 -0
  148. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/prompts/chat-with-bob.txt +0 -0
  149. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -0
  150. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -0
  151. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/prompts/chat.txt +0 -0
  152. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/prompts/dan-modified.txt +0 -0
  153. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/prompts/dan.txt +0 -0
  154. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/prompts/reason-act.txt +0 -0
  155. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/requirements.txt +0 -0
  156. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/scripts/build-info.cmake +0 -0
  157. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/scripts/build-info.h.in +0 -0
  158. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/scripts/build-info.sh +0 -0
  159. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/scripts/perf-run-all.sh +0 -0
  160. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/scripts/ppl-run-all.sh +0 -0
  161. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/scripts/sync-ggml.sh +0 -0
  162. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/scripts/verify-checksum-models.py +0 -0
  163. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/spm-headers/llama.h +0 -0
  164. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/tests/CMakeLists.txt +0 -0
  165. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/tests/test-double-float.c +0 -0
  166. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/tests/test-grad0.c +0 -0
  167. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/tests/test-opt.c +0 -0
  168. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/tests/test-quantize-fns.cpp +0 -0
  169. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/tests/test-quantize-perf.cpp +0 -0
  170. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/tests/test-sampling.cpp +0 -0
  171. {llama_cpp_python-0.1.54 → llama_cpp_python-0.1.56}/vendor/llama.cpp/tests/test-tokenizer-0.cpp +0 -0
@@ -0,0 +1,20 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [v0.1.56]
11
+
12
+ ### Added
13
+
14
+ - Added first version of the changelog
15
+ - Server: Use async routes
16
+ - Use numpy for internal buffers to reduce memory usage and improve performance.
17
+
18
+ ### Fixed
19
+
20
+ - Performance bug in stop sequence check slowing down streaming.
@@ -0,0 +1,49 @@
1
+ update:
2
+ poetry install
3
+ git submodule update --init --recursive
4
+
5
+ update.vendor:
6
+ cd vendor/llama.cpp && git pull origin master
7
+
8
+ build:
9
+ python3 setup.py develop
10
+
11
+ build.cuda:
12
+ CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 python3 setup.py develop
13
+
14
+ build.opencl:
15
+ CMAKE_ARGS="-DLLAMA_CLBLAST=on" FORCE_CMAKE=1 python3 setup.py develop
16
+
17
+ build.openblas:
18
+ CMAKE_ARGS="-DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 python3 setup.py develop
19
+
20
+ build.blis:
21
+ CMAKE_ARGS="-DLLAMA_OPENBLAS=on -DLLAMA_OPENBLAS_VENDOR=blis" FORCE_CMAKE=1 python3 setup.py develop
22
+
23
+ build.sdist:
24
+ python3 setup.py sdist
25
+
26
+ deploy.pypi:
27
+ python3 -m twine upload dist/*
28
+
29
+ deploy.gh-docs:
30
+ mkdocs build
31
+ mkdocs gh-deploy
32
+
33
+ clean:
34
+ - cd vendor/llama.cpp && make clean
35
+ - cd vendor/llama.cpp && rm libllama.so
36
+ - rm -rf _skbuild
37
+ - rm llama_cpp/libllama.so
38
+
39
+ .PHONY: \
40
+ update \
41
+ update.vendor \
42
+ build \
43
+ build.cuda \
44
+ build.opencl \
45
+ build.openblas \
46
+ build.sdist \
47
+ deploy.pypi \
48
+ deploy.gh-docs \
49
+ clean
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llama_cpp_python
3
- Version: 0.1.54
3
+ Version: 0.1.56
4
4
  Summary: A Python wrapper for llama.cpp
5
5
  Author: Andrei Betlen
6
6
  Author-email: abetlen@gmail.com
@@ -173,6 +173,17 @@ To get started, clone the repository and install the package in development mode
173
173
 
174
174
  ```bash
175
175
  git clone --recurse-submodules git@github.com:abetlen/llama-cpp-python.git
176
+
177
+ # Install with pip
178
+ pip install -e .
179
+
180
+ # if you want to use the fastapi / openapi server
181
+ pip install -e .[server]
182
+
183
+ # If you're a poetry user, installing will also include a virtual environment
184
+ poetry install --all-extras
185
+ . .venv/bin/activate
186
+
176
187
  # Will need to be re-run any time vendor/llama.cpp is updated
177
188
  python3 setup.py develop
178
189
  ```
@@ -155,6 +155,17 @@ To get started, clone the repository and install the package in development mode
155
155
 
156
156
  ```bash
157
157
  git clone --recurse-submodules git@github.com:abetlen/llama-cpp-python.git
158
+
159
+ # Install with pip
160
+ pip install -e .
161
+
162
+ # if you want to use the fastapi / openapi server
163
+ pip install -e .[server]
164
+
165
+ # If you're a poetry user, installing will also include a virtual environment
166
+ poetry install --all-extras
167
+ . .venv/bin/activate
168
+
158
169
  # Will need to be re-run any time vendor/llama.cpp is updated
159
170
  python3 setup.py develop
160
171
  ```
@@ -0,0 +1,51 @@
1
+ # Define the image argument and provide a default value
2
+ ARG IMAGE=python:3-slim-bullseye
3
+
4
+ # Use the image as specified
5
+ FROM ${IMAGE}
6
+
7
+ # Re-declare the ARG after FROM
8
+ ARG IMAGE
9
+
10
+ # Update and upgrade the existing packages
11
+ RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \
12
+ python3 \
13
+ python3-pip \
14
+ ninja-build \
15
+ build-essential
16
+
17
+ RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette
18
+
19
+ # Perform the conditional installations based on the image
20
+ RUN echo "Image: ${IMAGE}" && \
21
+ if [ "${IMAGE}" = "python:3-slim-bullseye" ] ; then \
22
+ echo "OpenBLAS install:" && \
23
+ apt-get install -y --no-install-recommends libopenblas-dev && \
24
+ LLAMA_OPENBLAS=1 pip install llama-cpp-python --verbose; \
25
+ else \
26
+ echo "CuBLAS install:" && \
27
+ LLAMA_CUBLAS=1 pip install llama-cpp-python --verbose; \
28
+ fi
29
+
30
+ # Clean up apt cache
31
+ RUN rm -rf /var/lib/apt/lists/*
32
+
33
+ # Set a working directory for better clarity
34
+ WORKDIR /app
35
+
36
+ # Copy files to the app directory
37
+ RUN echo "Installing model...this can take some time..."
38
+ COPY ./model.bin /app/model.bin
39
+ COPY ./start_server.sh /app/start_server.sh
40
+
41
+ # Make the server start script executable
42
+ RUN chmod +x /app/start_server.sh
43
+
44
+ # Set environment variable for the host
45
+ ENV HOST=0.0.0.0
46
+
47
+ # Expose a port for the server
48
+ EXPOSE 8000
49
+
50
+ # Run the server start script
51
+ CMD ["/bin/sh", "/app/start_server.sh"]
@@ -0,0 +1,46 @@
1
+ # Dockerfiles for building the llama-cpp-python server
2
+ - `Dockerfile.openblas_simple` - a simple Dockerfile for non-GPU OpenBLAS
3
+ - `Dockerfile.cuda_simple` - a simple Dockerfile for CUDA accelerated CuBLAS
4
+ - `hug_model.py` - a Python utility for interactively choosing and downloading the latest `5_1` quantized models from [huggingface.co/TheBloke]( https://huggingface.co/TheBloke)
5
+ - `Dockerfile` - a single OpenBLAS and CuBLAS combined Dockerfile that automatically installs a previously downloaded model `model.bin`
6
+
7
+ # Get model from Hugging Face
8
+ `python3 ./hug_model.py`
9
+
10
+ You should now have a model in the current directory and `model.bin` symlinked to it for the subsequent Docker build and copy step. e.g.
11
+ ```
12
+ docker $ ls -lh *.bin
13
+ -rw-rw-r-- 1 user user 4.8G May 23 18:30 <downloaded-model-file>.q5_1.bin
14
+ lrwxrwxrwx 1 user user 24 May 23 18:30 model.bin -> <downloaded-model-file>.q5_1.bin
15
+ ```
16
+ **Note #1:** Make sure you have enough disk space to download the model. As the model is then copied into the image you will need at least
17
+ **TWICE** as much disk space as the size of the model:
18
+
19
+ | Model | Quantized size |
20
+ |------:|----------------:|
21
+ | 7B | 5 GB |
22
+ | 13B | 10 GB |
23
+ | 30B | 25 GB |
24
+ | 65B | 50 GB |
25
+
26
+ **Note #2:** If you want to pass or tune additional parameters, customise `./start_server.sh` before running `docker build ...`
27
+
28
+ # Install Docker Server
29
+
30
+ **Note #3:** This was tested with Docker running on Linux. If you can get it working on Windows or MacOS, please update this `README.md` with a PR!
31
+
32
+ [Install Docker Engine](https://docs.docker.com/engine/install)
33
+
34
+ # Use OpenBLAS
35
+ Use if you don't have a NVidia GPU. Defaults to `python:3-slim-bullseye` Docker base image and OpenBLAS:
36
+ ## Build:
37
+ `docker build --build-arg -t openblas .`
38
+ ## Run:
39
+ `docker run --cap-add SYS_RESOURCE -t openblas`
40
+
41
+ # Use CuBLAS
42
+ Requires a NVidia GPU with sufficient VRAM (approximately as much as the size above) and Docker NVidia support (see [container-toolkit/install-guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html))
43
+ ## Build:
44
+ `docker build --build-arg IMAGE=nvidia/cuda:12.1.1-devel-ubuntu22.04 -t cublas .`
45
+ ## Run:
46
+ `docker run --cap-add SYS_RESOURCE -t cublas`
@@ -0,0 +1,116 @@
1
+ import requests
2
+ import json
3
+ import os
4
+ import struct
5
+
6
+ def make_request(url, params=None):
7
+ print(f"Making request to {url}...")
8
+ response = requests.get(url, params=params)
9
+ if response.status_code == 200:
10
+ return json.loads(response.text)
11
+ else:
12
+ print(f"Request failed with status code {response.status_code}")
13
+ return None
14
+
15
+ def check_magic_and_version(filename):
16
+ with open(filename, 'rb') as f:
17
+ # Read the first 6 bytes from the file
18
+ data = f.read(6)
19
+
20
+ # Unpack the binary data, interpreting the first 4 bytes as a little-endian unsigned int
21
+ # and the next 2 bytes as a little-endian unsigned short
22
+ magic, version = struct.unpack('<I H', data)
23
+
24
+ print(f"magic: 0x{magic:08x}, version: 0x{version:04x}, file: {filename}")
25
+
26
+ return magic, version
27
+
28
+ def download_file(url, destination):
29
+ print(f"Downloading {url} to {destination}...")
30
+ response = requests.get(url, stream=True)
31
+ if response.status_code == 200:
32
+ with open(destination, 'wb') as f:
33
+ total_downloaded = 0
34
+ for chunk in response.iter_content(chunk_size=1024):
35
+ if chunk: # filter out keep-alive new chunks
36
+ f.write(chunk)
37
+ total_downloaded += len(chunk)
38
+ if total_downloaded >= 10485760: # 10 MB
39
+ print('.', end='', flush=True)
40
+ total_downloaded = 0
41
+ print("\nDownload complete.")
42
+
43
+ # Creating a symbolic link from destination to "model.bin"
44
+ if os.path.isfile("model.bin"):
45
+ os.remove("model.bin") # remove the existing link if any
46
+ os.symlink(destination, "model.bin")
47
+ else:
48
+ print(f"Download failed with status code {response.status_code}")
49
+
50
+ def get_user_choice(model_list):
51
+ # Print the enumerated list
52
+ print("\n")
53
+ for i, (model_id, rfilename) in enumerate(model_list):
54
+ print(f"{i+1}: Model ID: {model_id}, RFilename: {rfilename}")
55
+
56
+ # Get user's choice
57
+ choice = input("Choose a model to download by entering the corresponding number: ")
58
+ try:
59
+ index = int(choice) - 1
60
+ if 0 <= index < len(model_list):
61
+ # Return the chosen model
62
+ return model_list[index]
63
+ else:
64
+ print("Invalid choice.")
65
+ except ValueError:
66
+ print("Invalid input. Please enter a number corresponding to a model.")
67
+ except IndexError:
68
+ print("Invalid choice. Index out of range.")
69
+
70
+ return None
71
+
72
+ import argparse
73
+
74
+ def main():
75
+ # Create an argument parser
76
+ parser = argparse.ArgumentParser(description='Process the model version.')
77
+ parser.add_argument('-v', '--version', type=int, default=0x0003,
78
+ help='an integer for the version to be used')
79
+
80
+ # Parse the arguments
81
+ args = parser.parse_args()
82
+
83
+ # Define the parameters
84
+ params = {
85
+ "author": "TheBloke", # Filter by author
86
+ "tags": "llama"
87
+ }
88
+
89
+ models = make_request('https://huggingface.co/api/models', params=params)
90
+ if models is None:
91
+ return
92
+
93
+ model_list = []
94
+ # Iterate over the models
95
+ for model in models:
96
+ model_id = model['id']
97
+ model_info = make_request(f'https://huggingface.co/api/models/{model_id}')
98
+ if model_info is None:
99
+ continue
100
+
101
+ for sibling in model_info.get('siblings', []):
102
+ rfilename = sibling.get('rfilename')
103
+ if rfilename and 'q5_1' in rfilename:
104
+ model_list.append((model_id, rfilename))
105
+
106
+ model_choice = get_user_choice(model_list)
107
+ if model_choice is not None:
108
+ model_id, rfilename = model_choice
109
+ url = f"https://huggingface.co/{model_id}/resolve/main/{rfilename}"
110
+ download_file(url, rfilename)
111
+ _, version = check_magic_and_version(rfilename)
112
+ if version != args.version:
113
+ print(f"Warning: Expected version {args.version}, but found different version in the file.")
114
+
115
+ if __name__ == '__main__':
116
+ main()
@@ -0,0 +1,11 @@
1
+ #!/bin/sh
2
+
3
+ # For mmap support
4
+ ulimit -l unlimited
5
+
6
+ if [ "$IMAGE" = "python:3-slim-bullseye" ]; then
7
+ python3 -B -m llama_cpp.server --model /app/model.bin
8
+ else
9
+ # You may have to reduce --n_gpu_layers=1000 to 20 or less if you don't have enough VRAM
10
+ python3 -B -m llama_cpp.server --model /app/model.bin --n_gpu_layers=1000
11
+ fi
@@ -368,10 +368,10 @@ n_keep = {self.params.n_keep}
368
368
  id = llama_cpp.llama_sample_token_mirostat_v2(self.ctx, candidates_p, llama_cpp.c_float(self.params.mirostat_tau), llama_cpp.c_float(self.params.mirostat_eta), llama_cpp.c_float(mirostat_mu))
369
369
  else:
370
370
  # Temperature sampling
371
- llama_cpp.llama_sample_top_k(self.ctx, candidates_p, top_k)
372
- llama_cpp.llama_sample_tail_free(self.ctx, candidates_p, llama_cpp.c_float(self.params.tfs_z))
373
- llama_cpp.llama_sample_typical(self.ctx, candidates_p, llama_cpp.c_float(self.params.typical_p))
374
- llama_cpp.llama_sample_top_p(self.ctx, candidates_p, llama_cpp.c_float(self.params.top_p))
371
+ llama_cpp.llama_sample_top_k(self.ctx, candidates_p, top_k, min_keep=llama_cpp.c_size_t(1))
372
+ llama_cpp.llama_sample_tail_free(self.ctx, candidates_p, llama_cpp.c_float(self.params.tfs_z), min_keep=llama_cpp.c_size_t(1))
373
+ llama_cpp.llama_sample_typical(self.ctx, candidates_p, llama_cpp.c_float(self.params.typical_p), min_keep=llama_cpp.c_size_t(1))
374
+ llama_cpp.llama_sample_top_p(self.ctx, candidates_p, llama_cpp.c_float(self.params.top_p), min_keep=llama_cpp.c_size_t(1))
375
375
  llama_cpp.llama_sample_temperature(self.ctx, candidates_p, llama_cpp.c_float(self.params.temp))
376
376
  id = llama_cpp.llama_sample_token(self.ctx, candidates_p)
377
377
  # print("`{}`".format(candidates_p.size))
@@ -382,12 +382,15 @@ n_keep = {self.params.n_keep}
382
382
  # replace end of text token with newline token when in interactive mode
383
383
  if (id == llama_cpp.llama_token_eos() and self.params.interactive and not self.params.instruct):
384
384
  id = self.llama_token_newline[0]
385
+ self.embd.append(id)
385
386
  if (self.use_antiprompt()):
386
387
  # tokenize and inject first reverse prompt
387
388
  self.embd_inp += self.first_antiprompt[0]
388
-
389
- # add it to the context
390
- self.embd.append(id)
389
+ for id in self.first_antiprompt[0]:
390
+ self.embd.append(id)
391
+ else:
392
+ # add it to the context
393
+ self.embd.append(id)
391
394
 
392
395
  # echo this to console
393
396
  self.output_echo = True
@@ -493,7 +496,7 @@ n_keep = {self.params.n_keep}
493
496
  # Contains multi-byte UTF8
494
497
  for num, pattern in [(2, 192), (3, 224), (4, 240)]:
495
498
  # Bitwise AND check
496
- if pattern & int.from_bytes(cur_char) == pattern:
499
+ if pattern & int.from_bytes(cur_char, 'little') == pattern:
497
500
  self.multibyte_fix = [cur_char] + ([None] * (num-1))
498
501
 
499
502
  # Stop incomplete bytes from passing