llama-cpp-python 0.1.56__tar.gz → 0.1.59__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/.github/ISSUE_TEMPLATE/bug_report.md +21 -5
  2. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/.gitignore +3 -0
  3. llama_cpp_python-0.1.59/CHANGELOG.md +43 -0
  4. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/CMakeLists.txt +2 -0
  5. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/Makefile +6 -1
  6. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/PKG-INFO +1 -1
  7. llama_cpp_python-0.1.59/docker/README.md +66 -0
  8. llama_cpp_python-0.1.56/docker/Dockerfile.cuda_simple → llama_cpp_python-0.1.59/docker/cuda_simple/Dockerfile +2 -2
  9. llama_cpp_python-0.1.59/docker/open_llama/build.sh +14 -0
  10. {llama_cpp_python-0.1.56/docker → llama_cpp_python-0.1.59/docker/open_llama}/hug_model.py +34 -11
  11. llama_cpp_python-0.1.59/docker/open_llama/start.sh +28 -0
  12. {llama_cpp_python-0.1.56/docker → llama_cpp_python-0.1.59/docker/open_llama}/start_server.sh +1 -1
  13. llama_cpp_python-0.1.56/docker/Dockerfile.openblas_simple → llama_cpp_python-0.1.59/docker/openblas_simple/Dockerfile +1 -1
  14. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/llama_cpp/llama.py +264 -191
  15. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/llama_cpp/llama_cpp.py +47 -15
  16. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/llama_cpp/server/app.py +27 -0
  17. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/llama_cpp_python.egg-info/PKG-INFO +1 -1
  18. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/llama_cpp_python.egg-info/SOURCES.txt +16 -6
  19. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/llama_cpp_python.egg-info/requires.txt +1 -0
  20. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/poetry.lock +56 -18
  21. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/pyproject.toml +6 -5
  22. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/setup.py +2 -2
  23. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/.devops/full.Dockerfile +1 -1
  24. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/.devops/main.Dockerfile +1 -1
  25. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/.devops/tools.sh +2 -2
  26. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/.github/workflows/build.yml +10 -10
  27. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/.github/workflows/tidy-post.yml +1 -1
  28. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/.gitignore +3 -0
  29. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/CMakeLists.txt +54 -16
  30. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/Makefile +64 -17
  31. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/README.md +116 -12
  32. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/convert-pth-to-ggml.py +3 -1
  33. llama_cpp_python-0.1.59/vendor/llama.cpp/docs/token_generation_performance_tips.md +40 -0
  34. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/CMakeLists.txt +4 -1
  35. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/common.cpp +62 -1
  36. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/common.h +12 -7
  37. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/main/README.md +29 -26
  38. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/main/main.cpp +16 -2
  39. llama_cpp_python-0.1.59/vendor/llama.cpp/examples/metal/CMakeLists.txt +3 -0
  40. llama_cpp_python-0.1.59/vendor/llama.cpp/examples/metal/metal.cpp +102 -0
  41. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/quantize/quantize.cpp +17 -5
  42. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/quantize-stats/quantize-stats.cpp +3 -2
  43. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/server/README.md +5 -2
  44. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/server/server.cpp +82 -13
  45. llama_cpp_python-0.1.59/vendor/llama.cpp/flake.lock +61 -0
  46. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/flake.nix +18 -8
  47. llama_cpp_python-0.1.59/vendor/llama.cpp/ggml-cuda.cu +1907 -0
  48. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/ggml-cuda.h +15 -2
  49. llama_cpp_python-0.1.59/vendor/llama.cpp/ggml-metal.h +63 -0
  50. llama_cpp_python-0.1.59/vendor/llama.cpp/ggml-metal.m +691 -0
  51. llama_cpp_python-0.1.59/vendor/llama.cpp/ggml-metal.metal +505 -0
  52. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/ggml-opencl.cpp +233 -52
  53. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/ggml-opencl.h +2 -0
  54. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/ggml.c +817 -75
  55. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/ggml.h +64 -8
  56. llama_cpp_python-0.1.59/vendor/llama.cpp/k_quants.c +2246 -0
  57. llama_cpp_python-0.1.59/vendor/llama.cpp/k_quants.h +122 -0
  58. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/llama-util.h +16 -0
  59. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/llama.cpp +400 -118
  60. {llama_cpp_python-0.1.56/vendor/llama.cpp/spm-headers → llama_cpp_python-0.1.59/vendor/llama.cpp}/llama.h +33 -3
  61. {llama_cpp_python-0.1.56/vendor/llama.cpp → llama_cpp_python-0.1.59/vendor/llama.cpp/spm-headers}/llama.h +33 -3
  62. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/tests/test-quantize-fns.cpp +6 -1
  63. llama_cpp_python-0.1.56/CHANGELOG.md +0 -20
  64. llama_cpp_python-0.1.56/docker/README.md +0 -46
  65. llama_cpp_python-0.1.56/vendor/llama.cpp/flake.lock +0 -43
  66. llama_cpp_python-0.1.56/vendor/llama.cpp/ggml-cuda.cu +0 -957
  67. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/.dockerignore +0 -0
  68. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  69. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/.github/dependabot.yml +0 -0
  70. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/.github/workflows/build-and-release.yaml +0 -0
  71. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/.github/workflows/build-docker.yaml +0 -0
  72. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/.github/workflows/publish-to-test.yaml +0 -0
  73. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/.github/workflows/publish.yaml +0 -0
  74. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/.github/workflows/test.yaml +0 -0
  75. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/.gitmodules +0 -0
  76. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/LICENSE.md +0 -0
  77. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/README.md +0 -0
  78. {llama_cpp_python-0.1.56/docker → llama_cpp_python-0.1.59/docker/open_llama}/Dockerfile +0 -0
  79. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/docs/index.md +0 -0
  80. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/examples/high_level_api/fastapi_server.py +0 -0
  81. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/examples/high_level_api/high_level_api_embedding.py +0 -0
  82. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/examples/high_level_api/high_level_api_inference.py +0 -0
  83. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/examples/high_level_api/high_level_api_streaming.py +0 -0
  84. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/examples/high_level_api/langchain_custom_llm.py +0 -0
  85. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/examples/low_level_api/Chat.py +0 -0
  86. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/examples/low_level_api/Miku.py +0 -0
  87. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/examples/low_level_api/ReasonAct.py +0 -0
  88. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/examples/low_level_api/common.py +0 -0
  89. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/examples/low_level_api/low_level_api_chat_cpp.py +0 -0
  90. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/examples/low_level_api/low_level_api_llama_cpp.py +0 -0
  91. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/examples/low_level_api/quantize.py +0 -0
  92. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/examples/low_level_api/util.py +0 -0
  93. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/examples/notebooks/Clients.ipynb +0 -0
  94. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/examples/notebooks/Guidance.ipynb +0 -0
  95. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/examples/notebooks/PerformanceTuning.ipynb +0 -0
  96. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/llama_cpp/__init__.py +0 -0
  97. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/llama_cpp/llama_types.py +0 -0
  98. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/llama_cpp/server/__init__.py +0 -0
  99. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/llama_cpp/server/__main__.py +0 -0
  100. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/llama_cpp_python.egg-info/dependency_links.txt +0 -0
  101. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/llama_cpp_python.egg-info/top_level.txt +0 -0
  102. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/mkdocs.yml +0 -0
  103. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/poetry.toml +0 -0
  104. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/setup.cfg +0 -0
  105. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/tests/test_llama.py +0 -0
  106. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/.clang-tidy +0 -0
  107. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/.dockerignore +0 -0
  108. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/.ecrc +0 -0
  109. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/.editorconfig +0 -0
  110. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/.github/ISSUE_TEMPLATE/custom.md +0 -0
  111. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/.github/workflows/docker.yml +0 -0
  112. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/.github/workflows/editorconfig.yml +0 -0
  113. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/.github/workflows/tidy-review.yml +0 -0
  114. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/LICENSE +0 -0
  115. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/Package.swift +0 -0
  116. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/SHA256SUMS +0 -0
  117. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/build.zig +0 -0
  118. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/convert-lora-to-ggml.py +0 -0
  119. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/convert.py +0 -0
  120. {llama_cpp_python-0.1.56/vendor/llama.cpp → llama_cpp_python-0.1.59/vendor/llama.cpp/docs}/BLIS.md +0 -0
  121. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/Miku.sh +0 -0
  122. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/alpaca.sh +0 -0
  123. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -0
  124. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -0
  125. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/benchmark/CMakeLists.txt +0 -0
  126. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -0
  127. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/chat-13B.bat +0 -0
  128. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/chat-13B.sh +0 -0
  129. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/chat-persistent.sh +0 -0
  130. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/chat.sh +0 -0
  131. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/embedding/CMakeLists.txt +0 -0
  132. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/embedding/README.md +0 -0
  133. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/embedding/embedding.cpp +0 -0
  134. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/gpt4all.sh +0 -0
  135. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/jeopardy/README.md +0 -0
  136. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/jeopardy/graph.py +0 -0
  137. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/jeopardy/jeopardy.sh +0 -0
  138. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/jeopardy/qasheet.csv +0 -0
  139. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/jeopardy/questions.txt +0 -0
  140. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/main/CMakeLists.txt +0 -0
  141. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/perplexity/CMakeLists.txt +0 -0
  142. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/perplexity/README.md +0 -0
  143. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/perplexity/perplexity.cpp +0 -0
  144. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/quantize/CMakeLists.txt +0 -0
  145. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/quantize/README.md +0 -0
  146. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -0
  147. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/reason-act.sh +0 -0
  148. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -0
  149. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -0
  150. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/server/CMakeLists.txt +0 -0
  151. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/server/httplib.h +0 -0
  152. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/examples/server/json.hpp +0 -0
  153. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/media/llama-leader.jpeg +0 -0
  154. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/media/llama0-banner.png +0 -0
  155. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/media/llama0-logo.png +0 -0
  156. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/media/llama1-banner.png +0 -0
  157. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/media/llama1-logo.png +0 -0
  158. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/models/ggml-vocab.bin +0 -0
  159. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/pocs/CMakeLists.txt +0 -0
  160. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/pocs/vdot/CMakeLists.txt +0 -0
  161. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/pocs/vdot/q8dot.cpp +0 -0
  162. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/pocs/vdot/vdot.cpp +0 -0
  163. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/prompts/alpaca.txt +0 -0
  164. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/prompts/chat-with-bob.txt +0 -0
  165. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -0
  166. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -0
  167. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/prompts/chat.txt +0 -0
  168. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/prompts/dan-modified.txt +0 -0
  169. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/prompts/dan.txt +0 -0
  170. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/prompts/reason-act.txt +0 -0
  171. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/requirements.txt +0 -0
  172. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/scripts/build-info.cmake +0 -0
  173. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/scripts/build-info.h.in +0 -0
  174. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/scripts/build-info.sh +0 -0
  175. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/scripts/perf-run-all.sh +0 -0
  176. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/scripts/ppl-run-all.sh +0 -0
  177. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/scripts/sync-ggml.sh +0 -0
  178. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/scripts/verify-checksum-models.py +0 -0
  179. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/tests/CMakeLists.txt +0 -0
  180. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/tests/test-double-float.c +0 -0
  181. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/tests/test-grad0.c +0 -0
  182. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/tests/test-opt.c +0 -0
  183. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/tests/test-quantize-perf.cpp +0 -0
  184. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/tests/test-sampling.cpp +0 -0
  185. {llama_cpp_python-0.1.56 → llama_cpp_python-0.1.59}/vendor/llama.cpp/tests/test-tokenizer-0.cpp +0 -0
@@ -57,7 +57,17 @@ Please provide detailed steps for reproducing the issue. We are not sitting in f
57
57
  3. step 3
58
58
  4. etc.
59
59
 
60
- **Note: Many issues seem to be regarding performance issues / differences with `llama.cpp`. In these cases we need to confirm that you're comparing against the version of `llama.cpp` that was built with your python package, and which parameters you're passing to the context.**
60
+ **Note: Many issues seem to be regarding functional or performance issues / differences with `llama.cpp`. In these cases we need to confirm that you're comparing against the version of `llama.cpp` that was built with your python package, and which parameters you're passing to the context.**
61
+
62
+ Try the following:
63
+
64
+ 1. `git clone https://github.com/abetlen/llama-cpp-python`
65
+ 2. `cd llama-cpp-python`
66
+ 3. `rm -rf _skbuild/` # delete any old builds
67
+ 4. `python setup.py develop`
68
+ 5. `cd ./vendor/llama.cpp`
69
+ 6. Follow [llama.cpp's instructions](https://github.com/ggerganov/llama.cpp#build) to `cmake` llama.cpp
70
+ 7. Run llama.cpp's `./main` with the same arguments you previously passed to llama-cpp-python and see if you can reproduce the issue. If you can, [log an issue with llama.cpp](https://github.com/ggerganov/llama.cpp/issues)
61
71
 
62
72
  # Failure Logs
63
73
 
@@ -73,8 +83,14 @@ commit 47b0aa6e957b93dbe2c29d53af16fbae2dd628f2
73
83
  llama-cpp-python$ python3 --version
74
84
  Python 3.10.10
75
85
 
76
- llama-cpp-python$ pip list | egrep "uvicorn|fastapi|sse-starlette"
77
- fastapi 0.95.0
78
- sse-starlette 1.3.3
79
- uvicorn 0.21.1
86
+ llama-cpp-python$ pip list | egrep "uvicorn|fastapi|sse-starlette|numpy"
87
+ fastapi 0.95.0
88
+ numpy 1.24.3
89
+ sse-starlette 1.3.3
90
+ uvicorn 0.21.1
91
+
92
+ llama-cpp-python/vendor/llama.cpp$ git log | head -3
93
+ commit 66874d4fbcc7866377246efbcee938e8cc9c7d76
94
+ Author: Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com>
95
+ Date: Thu May 25 20:18:01 2023 -0600
80
96
  ```
@@ -164,3 +164,6 @@ cython_debug/
164
164
  # and can be added to the global gitignore or merged into this file. For a more nuclear
165
165
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
166
166
  .idea/
167
+
168
+ # downloaded model .bin files
169
+ docker/open_llama/*.bin
@@ -0,0 +1,43 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [v0.1.59]
11
+
12
+ ### Added
13
+
14
+ - (llama.cpp) k-quants support
15
+ - (server) mirostat sampling parameters to server
16
+
17
+ ### Fixed
18
+
19
+ - Support both `.so` and `.dylib` for `libllama` on MacOS
20
+
21
+ ## [v0.1.58]
22
+
23
+ ### Added
24
+
25
+ - (llama.cpp) Metal Silicon support
26
+
27
+ ## [v0.1.57]
28
+
29
+ ### Added
30
+
31
+ - (llama.cpp) OpenLlama 3B support
32
+
33
+ ## [v0.1.56]
34
+
35
+ ### Added
36
+
37
+ - (misc) Added first version of the changelog
38
+ - (server) Use async routes
39
+ - (python-api) Use numpy for internal buffers to reduce memory usage and improve performance.
40
+
41
+ ### Fixed
42
+
43
+ - (python-api) Performance bug in stop sequence check slowing down streaming.
@@ -27,5 +27,7 @@ else()
27
27
  TARGETS llama
28
28
  LIBRARY DESTINATION llama_cpp
29
29
  RUNTIME DESTINATION llama_cpp
30
+ ARCHIVE DESTINATION llama_cpp
31
+ FRAMEWORK DESTINATION llama_cpp
30
32
  )
31
33
  endif()
@@ -20,6 +20,9 @@ build.openblas:
20
20
  build.blis:
21
21
  CMAKE_ARGS="-DLLAMA_OPENBLAS=on -DLLAMA_OPENBLAS_VENDOR=blis" FORCE_CMAKE=1 python3 setup.py develop
22
22
 
23
+ build.metal:
24
+ CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 python3 setup.py develop
25
+
23
26
  build.sdist:
24
27
  python3 setup.py sdist
25
28
 
@@ -34,7 +37,9 @@ clean:
34
37
  - cd vendor/llama.cpp && make clean
35
38
  - cd vendor/llama.cpp && rm libllama.so
36
39
  - rm -rf _skbuild
37
- - rm llama_cpp/libllama.so
40
+ - rm llama_cpp/*.so
41
+ - rm llama_cpp/*.dylib
42
+ - rm llama_cpp/*.dll
38
43
 
39
44
  .PHONY: \
40
45
  update \
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llama_cpp_python
3
- Version: 0.1.56
3
+ Version: 0.1.59
4
4
  Summary: A Python wrapper for llama.cpp
5
5
  Author: Andrei Betlen
6
6
  Author-email: abetlen@gmail.com
@@ -0,0 +1,66 @@
1
+ # Install Docker Server
2
+
3
+ **Note #1:** This was tested with Docker running on Linux. If you can get it working on Windows or MacOS, please update this `README.md` with a PR!
4
+
5
+ [Install Docker Engine](https://docs.docker.com/engine/install)
6
+
7
+ **Note #2:** NVidia GPU CuBLAS support requires a NVidia GPU with sufficient VRAM (approximately as much as the size in the table below) and Docker NVidia support (see [container-toolkit/install-guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html))
8
+
9
+ # Simple Dockerfiles for building the llama-cpp-python server with external model bin files
10
+ ## openblas_simple - a simple Dockerfile for non-GPU OpenBLAS, where the model is located outside the Docker image
11
+ ```
12
+ cd ./openblas_simple
13
+ docker build -t openblas_simple .
14
+ docker run -e USE_MLOCK=0 -e MODEL=/var/model/<model-path> -v <model-root-path>:/var/model -t openblas_simple
15
+ ```
16
+ where `<model-root-path>/<model-path>` is the full path to the model file on the Docker host system.
17
+
18
+ ## cuda_simple - a simple Dockerfile for CUDA accelerated CuBLAS, where the model is located outside the Docker image
19
+ ```
20
+ cd ./cuda_simple
21
+ docker build -t cuda_simple .
22
+ docker run -e USE_MLOCK=0 -e MODEL=/var/model/<model-path> -v <model-root-path>:/var/model -t cuda_simple
23
+ ```
24
+ where `<model-root-path>/<model-path>` is the full path to the model file on the Docker host system.
25
+
26
+ # "Open-Llama-in-a-box"
27
+ ## Download an Apache V2.0 licensed 3B paramter Open Llama model and install into a Docker image that runs an OpenBLAS-enabled llama-cpp-python server
28
+ ```
29
+ $ cd ./open_llama
30
+ ./build.sh
31
+ ./start.sh
32
+ ```
33
+
34
+ # Manually choose your own Llama model from Hugging Face
35
+ `python3 ./hug_model.py -a TheBloke -t llama`
36
+ You should now have a model in the current directory and `model.bin` symlinked to it for the subsequent Docker build and copy step. e.g.
37
+ ```
38
+ docker $ ls -lh *.bin
39
+ -rw-rw-r-- 1 user user 4.8G May 23 18:30 <downloaded-model-file>q5_1.bin
40
+ lrwxrwxrwx 1 user user 24 May 23 18:30 model.bin -> <downloaded-model-file>q5_1.bin
41
+ ```
42
+ **Note #1:** Make sure you have enough disk space to download the model. As the model is then copied into the image you will need at least
43
+ **TWICE** as much disk space as the size of the model:
44
+
45
+ | Model | Quantized size |
46
+ |------:|----------------:|
47
+ | 3B | 3 GB |
48
+ | 7B | 5 GB |
49
+ | 13B | 10 GB |
50
+ | 33B | 25 GB |
51
+ | 65B | 50 GB |
52
+
53
+ **Note #2:** If you want to pass or tune additional parameters, customise `./start_server.sh` before running `docker build ...`
54
+
55
+ ## Use OpenBLAS
56
+ Use if you don't have a NVidia GPU. Defaults to `python:3-slim-bullseye` Docker base image and OpenBLAS:
57
+ ### Build:
58
+ `docker build -t openblas .`
59
+ ### Run:
60
+ `docker run --cap-add SYS_RESOURCE -t openblas`
61
+
62
+ ## Use CuBLAS
63
+ ### Build:
64
+ `docker build --build-arg IMAGE=nvidia/cuda:12.1.1-devel-ubuntu22.04 -t cublas .`
65
+ ### Run:
66
+ `docker run --cap-add SYS_RESOURCE -t cublas`
@@ -1,5 +1,5 @@
1
1
  ARG CUDA_IMAGE="12.1.1-devel-ubuntu22.04"
2
- FROM ${CUDA_IMAGE}
2
+ FROM nvidia/cuda:${CUDA_IMAGE}
3
3
 
4
4
  # We need to set the host to 0.0.0.0 to allow outside access
5
5
  ENV HOST 0.0.0.0
@@ -10,7 +10,7 @@ COPY . .
10
10
  RUN apt update && apt install -y python3 python3-pip
11
11
  RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette
12
12
 
13
- RUN LLAMA_CUBLAS=1 python3 setup.py develop
13
+ RUN LLAMA_CUBLAS=1 pip install llama-cpp-python
14
14
 
15
15
  # Run the server
16
16
  CMD python3 -m llama_cpp.server
@@ -0,0 +1,14 @@
1
+ #!/bin/sh
2
+
3
+ MODEL="open_llama_3b"
4
+ # Get open_llama_3b_ggml q5_1 quantization
5
+ python3 ./hug_model.py -a SlyEcho -s ${MODEL} -f "q5_1"
6
+ ls -lh *.bin
7
+
8
+ # Build the default OpenBLAS image
9
+ docker build -t $MODEL .
10
+ docker images | egrep "^(REPOSITORY|$MODEL)"
11
+
12
+ echo
13
+ echo "To start the docker container run:"
14
+ echo "docker run -t -p 8000:8000 $MODEL"
@@ -2,6 +2,7 @@ import requests
2
2
  import json
3
3
  import os
4
4
  import struct
5
+ import argparse
5
6
 
6
7
  def make_request(url, params=None):
7
8
  print(f"Making request to {url}...")
@@ -69,21 +70,30 @@ def get_user_choice(model_list):
69
70
 
70
71
  return None
71
72
 
72
- import argparse
73
-
74
73
  def main():
75
74
  # Create an argument parser
76
- parser = argparse.ArgumentParser(description='Process the model version.')
75
+ parser = argparse.ArgumentParser(description='Process some parameters.')
76
+
77
+ # Arguments
77
78
  parser.add_argument('-v', '--version', type=int, default=0x0003,
78
- help='an integer for the version to be used')
79
+ help='hexadecimal version number of ggml file')
80
+ parser.add_argument('-a', '--author', type=str, default='TheBloke',
81
+ help='HuggingFace author filter')
82
+ parser.add_argument('-t', '--tag', type=str, default='llama',
83
+ help='HuggingFace tag filter')
84
+ parser.add_argument('-s', '--search', type=str, default='',
85
+ help='HuggingFace search filter')
86
+ parser.add_argument('-f', '--filename', type=str, default='q5_1',
87
+ help='HuggingFace model repository filename substring match')
79
88
 
80
89
  # Parse the arguments
81
90
  args = parser.parse_args()
82
91
 
83
92
  # Define the parameters
84
93
  params = {
85
- "author": "TheBloke", # Filter by author
86
- "tags": "llama"
94
+ "author": args.author,
95
+ "tags": args.tag,
96
+ "search": args.search
87
97
  }
88
98
 
89
99
  models = make_request('https://huggingface.co/api/models', params=params)
@@ -100,17 +110,30 @@ def main():
100
110
 
101
111
  for sibling in model_info.get('siblings', []):
102
112
  rfilename = sibling.get('rfilename')
103
- if rfilename and 'q5_1' in rfilename:
113
+ if rfilename and args.filename in rfilename:
104
114
  model_list.append((model_id, rfilename))
105
115
 
106
- model_choice = get_user_choice(model_list)
116
+ # Choose the model
117
+ model_list.sort(key=lambda x: x[0])
118
+ if len(model_list) == 0:
119
+ print("No models found")
120
+ exit(1)
121
+ elif len(model_list) == 1:
122
+ model_choice = model_list[0]
123
+ else:
124
+ model_choice = get_user_choice(model_list)
125
+
107
126
  if model_choice is not None:
108
127
  model_id, rfilename = model_choice
109
128
  url = f"https://huggingface.co/{model_id}/resolve/main/{rfilename}"
110
- download_file(url, rfilename)
111
- _, version = check_magic_and_version(rfilename)
129
+ dest = f"{model_id.replace('/', '_')}_{rfilename}"
130
+ download_file(url, dest)
131
+ _, version = check_magic_and_version(dest)
112
132
  if version != args.version:
113
- print(f"Warning: Expected version {args.version}, but found different version in the file.")
133
+ print(f"Warning: Expected version {args.version}, but found different version in the file.")
134
+ else:
135
+ print("Error - model choice was None")
136
+ exit(2)
114
137
 
115
138
  if __name__ == '__main__':
116
139
  main()
@@ -0,0 +1,28 @@
1
+ #!/bin/sh
2
+
3
+ MODEL="open_llama_3b"
4
+
5
+ # Start Docker container
6
+ docker run --cap-add SYS_RESOURCE -p 8000:8000 -t $MODEL &
7
+ sleep 10
8
+ echo
9
+ docker ps | egrep "(^CONTAINER|$MODEL)"
10
+
11
+ # Test the model works
12
+ echo
13
+ curl -X 'POST' 'http://localhost:8000/v1/completions' -H 'accept: application/json' -H 'Content-Type: application/json' -d '{
14
+ "prompt": "\n\n### Instructions:\nWhat is the capital of France?\n\n### Response:\n",
15
+ "stop": [
16
+ "\n",
17
+ "###"
18
+ ]
19
+ }' | grep Paris
20
+ if [ $? -eq 0 ]
21
+ then
22
+ echo
23
+ echo "$MODEL is working!!"
24
+ else
25
+ echo
26
+ echo "ERROR: $MODEL not replying."
27
+ exit 1
28
+ fi
@@ -1,6 +1,6 @@
1
1
  #!/bin/sh
2
2
 
3
- # For mmap support
3
+ # For mlock support
4
4
  ulimit -l unlimited
5
5
 
6
6
  if [ "$IMAGE" = "python:3-slim-bullseye" ]; then
@@ -9,7 +9,7 @@ COPY . .
9
9
  RUN apt update && apt install -y libopenblas-dev ninja-build build-essential
10
10
  RUN python -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette
11
11
 
12
- RUN LLAMA_OPENBLAS=1 python3 setup.py develop
12
+ RUN LLAMA_OPENBLAS=1 pip install llama_cpp_python --verbose
13
13
 
14
14
  # Run the server
15
15
  CMD python3 -m llama_cpp.server