llama-cpp-python 0.1.47__tar.gz → 0.1.50__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/PKG-INFO +5 -5
  2. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/README.md +4 -4
  3. llama_cpp_python-0.1.50/examples/high_level_api/fastapi_server.py +37 -0
  4. llama_cpp_python-0.1.50/examples/low_level_api/Chat.py +71 -0
  5. llama_cpp_python-0.1.50/examples/low_level_api/Miku.py +59 -0
  6. llama_cpp_python-0.1.50/examples/low_level_api/ReasonAct.py +49 -0
  7. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/examples/low_level_api/common.py +96 -45
  8. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/examples/low_level_api/low_level_api_chat_cpp.py +211 -68
  9. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/examples/low_level_api/low_level_api_llama_cpp.py +26 -9
  10. llama_cpp_python-0.1.50/examples/low_level_api/util.py +95 -0
  11. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/llama_cpp/llama.py +114 -14
  12. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/llama_cpp/llama_cpp.py +6 -5
  13. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/llama_cpp/server/app.py +30 -6
  14. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/llama_cpp_python.egg-info/PKG-INFO +5 -5
  15. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/llama_cpp_python.egg-info/SOURCES.txt +14 -0
  16. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/poetry.lock +4 -4
  17. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/pyproject.toml +2 -2
  18. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/setup.py +1 -1
  19. llama_cpp_python-0.1.50/vendor/llama.cpp/.clang-tidy +18 -0
  20. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/.github/workflows/build.yml +65 -8
  21. llama_cpp_python-0.1.50/vendor/llama.cpp/.github/workflows/tidy-post.yml +20 -0
  22. llama_cpp_python-0.1.50/vendor/llama.cpp/.github/workflows/tidy-review.yml +23 -0
  23. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/.gitignore +3 -0
  24. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/Makefile +9 -8
  25. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/README.md +71 -33
  26. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/SHA256SUMS +16 -12
  27. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/convert.py +7 -3
  28. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/CMakeLists.txt +1 -0
  29. llama_cpp_python-0.1.50/vendor/llama.cpp/examples/baby-llama/CMakeLists.txt +4 -0
  30. llama_cpp_python-0.1.50/vendor/llama.cpp/examples/baby-llama/baby-llama.cpp +1687 -0
  31. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/common.cpp +392 -80
  32. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/common.h +28 -13
  33. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/embedding/embedding.cpp +0 -3
  34. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/main/README.md +2 -2
  35. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/main/main.cpp +34 -52
  36. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/perplexity/perplexity.cpp +47 -23
  37. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/quantize/quantize.cpp +5 -6
  38. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/ggml-cuda.cu +291 -109
  39. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/ggml-cuda.h +2 -0
  40. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/ggml-opencl.c +85 -122
  41. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/ggml.c +3835 -2067
  42. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/ggml.h +201 -11
  43. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/llama.cpp +156 -75
  44. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/llama.h +7 -6
  45. llama_cpp_python-0.1.50/vendor/llama.cpp/prompts/dan-modified.txt +1 -0
  46. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/prompts/dan.txt +1 -1
  47. llama_cpp_python-0.1.50/vendor/llama.cpp/scripts/perf-run-all.sh +93 -0
  48. llama_cpp_python-0.1.50/vendor/llama.cpp/scripts/ppl-run-all.sh +39 -0
  49. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/spm-headers/llama.h +7 -6
  50. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/tests/CMakeLists.txt +2 -0
  51. llama_cpp_python-0.1.50/vendor/llama.cpp/tests/test-grad0.c +1131 -0
  52. llama_cpp_python-0.1.50/vendor/llama.cpp/tests/test-opt.c +205 -0
  53. llama_cpp_python-0.1.47/examples/high_level_api/fastapi_server.py +0 -262
  54. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/.dockerignore +0 -0
  55. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  56. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  57. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/.github/dependabot.yml +0 -0
  58. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/.github/workflows/build-and-release.yaml +0 -0
  59. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/.github/workflows/build-docker.yaml +0 -0
  60. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/.github/workflows/publish-to-test.yaml +0 -0
  61. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/.github/workflows/publish.yaml +0 -0
  62. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/.github/workflows/test.yaml +0 -0
  63. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/.gitignore +0 -0
  64. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/.gitmodules +0 -0
  65. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/CMakeLists.txt +0 -0
  66. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/Dockerfile +0 -0
  67. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/Dockerfile.cuda +0 -0
  68. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/LICENSE.md +0 -0
  69. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/docs/index.md +0 -0
  70. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/examples/high_level_api/high_level_api_embedding.py +0 -0
  71. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/examples/high_level_api/high_level_api_inference.py +0 -0
  72. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/examples/high_level_api/high_level_api_streaming.py +0 -0
  73. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/examples/high_level_api/langchain_custom_llm.py +0 -0
  74. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/examples/low_level_api/quantize.py +0 -0
  75. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/examples/notebooks/Clients.ipynb +0 -0
  76. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/examples/notebooks/PerformanceTuning.ipynb +0 -0
  77. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/llama_cpp/__init__.py +0 -0
  78. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/llama_cpp/llama_types.py +0 -0
  79. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/llama_cpp/server/__init__.py +0 -0
  80. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/llama_cpp/server/__main__.py +0 -0
  81. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/llama_cpp_python.egg-info/dependency_links.txt +0 -0
  82. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/llama_cpp_python.egg-info/requires.txt +0 -0
  83. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/llama_cpp_python.egg-info/top_level.txt +0 -0
  84. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/mkdocs.yml +0 -0
  85. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/setup.cfg +0 -0
  86. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/tests/test_llama.py +0 -0
  87. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/.devops/full.Dockerfile +0 -0
  88. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/.devops/main.Dockerfile +0 -0
  89. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/.devops/tools.sh +0 -0
  90. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/.dockerignore +0 -0
  91. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/.ecrc +0 -0
  92. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/.editorconfig +0 -0
  93. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/.github/ISSUE_TEMPLATE/custom.md +0 -0
  94. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/.github/workflows/docker.yml +0 -0
  95. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/.github/workflows/editorconfig.yml +0 -0
  96. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/CMakeLists.txt +0 -0
  97. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/LICENSE +0 -0
  98. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/Package.swift +0 -0
  99. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/build.zig +0 -0
  100. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/convert-lora-to-ggml.py +0 -0
  101. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/convert-pth-to-ggml.py +0 -0
  102. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/Miku.sh +0 -0
  103. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/alpaca.sh +0 -0
  104. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/benchmark/CMakeLists.txt +0 -0
  105. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -0
  106. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/chat-13B.bat +0 -0
  107. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/chat-13B.sh +0 -0
  108. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/chat.sh +0 -0
  109. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/embedding/CMakeLists.txt +0 -0
  110. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/embedding/README.md +0 -0
  111. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/gpt4all.sh +0 -0
  112. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/jeopardy/README.md +0 -0
  113. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/jeopardy/graph.py +0 -0
  114. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/jeopardy/jeopardy.sh +0 -0
  115. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/jeopardy/qasheet.csv +0 -0
  116. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/jeopardy/questions.txt +0 -0
  117. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/main/CMakeLists.txt +0 -0
  118. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/perplexity/CMakeLists.txt +0 -0
  119. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/perplexity/README.md +0 -0
  120. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/quantize/CMakeLists.txt +0 -0
  121. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/quantize/README.md +0 -0
  122. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -0
  123. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/quantize-stats/quantize-stats.cpp +0 -0
  124. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/reason-act.sh +0 -0
  125. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -0
  126. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -0
  127. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/flake.lock +0 -0
  128. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/flake.nix +0 -0
  129. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/ggml-opencl.h +0 -0
  130. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/llama-util.h +0 -0
  131. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/media/llama-leader.jpeg +0 -0
  132. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/media/llama0-banner.png +0 -0
  133. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/media/llama0-logo.png +0 -0
  134. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/media/llama1-banner.png +0 -0
  135. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/media/llama1-logo.png +0 -0
  136. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/models/ggml-vocab.bin +0 -0
  137. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/pocs/CMakeLists.txt +0 -0
  138. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/pocs/vdot/CMakeLists.txt +0 -0
  139. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/pocs/vdot/q8dot.cpp +0 -0
  140. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/pocs/vdot/vdot.cpp +0 -0
  141. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/prompts/alpaca.txt +0 -0
  142. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/prompts/chat-with-bob.txt +0 -0
  143. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -0
  144. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -0
  145. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/prompts/chat.txt +0 -0
  146. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/prompts/reason-act.txt +0 -0
  147. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/requirements.txt +0 -0
  148. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/scripts/build-info.cmake +0 -0
  149. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/scripts/build-info.h.in +0 -0
  150. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/scripts/build-info.sh +0 -0
  151. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/scripts/sync-ggml.sh +0 -0
  152. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/scripts/verify-checksum-models.py +0 -0
  153. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/tests/test-double-float.c +0 -0
  154. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/tests/test-quantize-fns.cpp +0 -0
  155. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/tests/test-quantize-perf.cpp +0 -0
  156. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/tests/test-sampling.cpp +0 -0
  157. {llama_cpp_python-0.1.47 → llama_cpp_python-0.1.50}/vendor/llama.cpp/tests/test-tokenizer-0.cpp +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llama_cpp_python
3
- Version: 0.1.47
3
+ Version: 0.1.50
4
4
  Summary: A Python wrapper for llama.cpp
5
5
  Author: Andrei Betlen
6
6
  Author-email: abetlen@gmail.com
@@ -53,19 +53,19 @@ Use the `FORCE_CMAKE=1` environment variable to force the use of `cmake` and ins
53
53
  To install with OpenBLAS, set the `LLAMA_OPENBLAS=1` environment variable before installing:
54
54
 
55
55
  ```bash
56
- LLAMA_OPENBLAS=1 FORCE_CMAKE=1 pip install llama-cpp-python
56
+ CMAKE_ARGS="-DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python
57
57
  ```
58
58
 
59
59
  To install with cuBLAS, set the `LLAMA_CUBLAS=1` environment variable before installing:
60
60
 
61
61
  ```bash
62
- LLAMA_CUBLAS=1 FORCE_CMAKE=1 pip install llama-cpp-python
62
+ CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python
63
63
  ```
64
64
 
65
65
  To install with CLBlast, set the `LLAMA_CLBLAST=1` environment variable before installing:
66
66
 
67
67
  ```bash
68
- LLAMA_CLBLAST=1 FORCE_CMAKE=1 pip install llama-cpp-python
68
+ CMAKE_ARGS="-DLLAMA_CLBLAST=on" FORCE_CMAKE=1 pip install llama-cpp-python
69
69
  ```
70
70
 
71
71
 
@@ -120,7 +120,7 @@ Navigate to [http://localhost:8000/docs](http://localhost:8000/docs) to see the
120
120
  A Docker image is available on [GHCR](https://ghcr.io/abetlen/llama-cpp-python). To run the server:
121
121
 
122
122
  ```bash
123
- docker run --rm -it -p8000:8000 -v /path/to/models:/models -eMODEL=/models/ggml-model-name.bin ghcr.io/abetlen/llama-cpp-python:latest
123
+ docker run --rm -it -p 8000:8000 -v /path/to/models:/models -e MODEL=/models/ggml-model-name.bin ghcr.io/abetlen/llama-cpp-python:latest
124
124
  ```
125
125
 
126
126
  ## Low-level API
@@ -35,19 +35,19 @@ Use the `FORCE_CMAKE=1` environment variable to force the use of `cmake` and ins
35
35
  To install with OpenBLAS, set the `LLAMA_OPENBLAS=1` environment variable before installing:
36
36
 
37
37
  ```bash
38
- LLAMA_OPENBLAS=1 FORCE_CMAKE=1 pip install llama-cpp-python
38
+ CMAKE_ARGS="-DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python
39
39
  ```
40
40
 
41
41
  To install with cuBLAS, set the `LLAMA_CUBLAS=1` environment variable before installing:
42
42
 
43
43
  ```bash
44
- LLAMA_CUBLAS=1 FORCE_CMAKE=1 pip install llama-cpp-python
44
+ CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python
45
45
  ```
46
46
 
47
47
  To install with CLBlast, set the `LLAMA_CLBLAST=1` environment variable before installing:
48
48
 
49
49
  ```bash
50
- LLAMA_CLBLAST=1 FORCE_CMAKE=1 pip install llama-cpp-python
50
+ CMAKE_ARGS="-DLLAMA_CLBLAST=on" FORCE_CMAKE=1 pip install llama-cpp-python
51
51
  ```
52
52
 
53
53
 
@@ -102,7 +102,7 @@ Navigate to [http://localhost:8000/docs](http://localhost:8000/docs) to see the
102
102
  A Docker image is available on [GHCR](https://ghcr.io/abetlen/llama-cpp-python). To run the server:
103
103
 
104
104
  ```bash
105
- docker run --rm -it -p8000:8000 -v /path/to/models:/models -eMODEL=/models/ggml-model-name.bin ghcr.io/abetlen/llama-cpp-python:latest
105
+ docker run --rm -it -p 8000:8000 -v /path/to/models:/models -e MODEL=/models/ggml-model-name.bin ghcr.io/abetlen/llama-cpp-python:latest
106
106
  ```
107
107
 
108
108
  ## Low-level API
@@ -0,0 +1,37 @@
1
+ """Example FastAPI server for llama.cpp.
2
+
3
+ To run this example:
4
+
5
+ ```bash
6
+ pip install fastapi uvicorn sse-starlette
7
+ export MODEL=../models/7B/...
8
+ ```
9
+
10
+ Then run:
11
+ ```
12
+ uvicorn llama_cpp.server.app:app --reload
13
+ ```
14
+
15
+ or
16
+
17
+ ```
18
+ python3 -m llama_cpp.server
19
+ ```
20
+
21
+ Then visit http://localhost:8000/docs to see the interactive API docs.
22
+
23
+
24
+ To actually see the implementation of the server, see llama_cpp/server/app.py
25
+
26
+ """
27
+ import os
28
+ import uvicorn
29
+
30
+ from llama_cpp.server.app import create_app
31
+
32
+ if __name__ == "__main__":
33
+ app = create_app()
34
+
35
+ uvicorn.run(
36
+ app, host=os.getenv("HOST", "localhost"), port=int(os.getenv("PORT", 8000))
37
+ )
@@ -0,0 +1,71 @@
1
+ #!/bin/python
2
+ import sys, os, datetime
3
+ from common import GptParams
4
+ from low_level_api_chat_cpp import LLaMAInteract
5
+
6
+ def env_or_def(env, default):
7
+ if (env in os.environ):
8
+ return os.environ[env]
9
+ return default
10
+
11
+ AI_NAME = env_or_def("AI_NAME", "ChatLLaMa")
12
+ MODEL = env_or_def("MODEL", "./models/llama-13B/ggml-model.bin")
13
+ USER_NAME = env_or_def("USER_NAME", "USER")
14
+ N_PREDICTS = int(env_or_def("N_PREDICTS", "2048"))
15
+ N_THREAD = int(env_or_def("N_THREAD", "8"))
16
+
17
+ today = datetime.datetime.today()
18
+ DATE_YEAR=today.strftime("%Y")
19
+ DATE_TIME=today.strftime("%H:%M")
20
+
21
+ prompt=f"""Text transcript of a never ending dialog, where {USER_NAME} interacts with an AI assistant named {AI_NAME}.
22
+ {AI_NAME} is helpful, kind, honest, friendly, good at writing and never fails to answer {USER_NAME}'s requests immediately and with details and precision.
23
+ There are no annotations like (30 seconds passed...) or (to himself), just what {USER_NAME} and {AI_NAME} say aloud to each other.
24
+ The dialog lasts for years, the entirety of it is shared below. It's 10000 pages long.
25
+ The transcript only includes text, it does not include markup like HTML and Markdown.
26
+
27
+ {USER_NAME}: Hello, {AI_NAME}!
28
+ {AI_NAME}: Hello {USER_NAME}! How may I help you today?
29
+ {USER_NAME}: What year is it?
30
+ {AI_NAME}: We are in {DATE_YEAR}.
31
+ {USER_NAME}: Please tell me the largest city in Europe.
32
+ {AI_NAME}: The largest city in Europe is Moscow, the capital of Russia.
33
+ {USER_NAME}: What can you tell me about Moscow?
34
+ {AI_NAME}: Moscow, on the Moskva River in western Russia, is the nation's cosmopolitan capital. In its historic core is the Kremlin, a complex that's home to the president and tsarist treasures in the Armoury. Outside its walls is Red Square, Russia’s symbolic center.
35
+ {USER_NAME}: What is a cat?
36
+ {AI_NAME}: A cat is a domestic species of small carnivorous mammal. It is the only domesticated species in the family Felidae.
37
+ {USER_NAME}: How do I pass command line arguments to a Node.js program?
38
+ {AI_NAME}: The arguments are stored in process.argv.
39
+
40
+ argv[0] is the path to the Node. js executable.
41
+ argv[1] is the path to the script file.
42
+ argv[2] is the first argument passed to the script.
43
+ argv[3] is the second argument passed to the script and so on.
44
+ {USER_NAME}: Name a color.
45
+ {AI_NAME}: Blue.
46
+ {USER_NAME}: What time is it?
47
+ {AI_NAME}: It is {DATE_TIME}.
48
+ {USER_NAME}:""" + " ".join(sys.argv[1:])
49
+
50
+ print("Loading model...")
51
+ params = GptParams(
52
+ n_ctx=2048,
53
+ temp=0.7,
54
+ top_k=40,
55
+ top_p=0.5,
56
+ repeat_last_n=256,
57
+ n_batch=1024,
58
+ repeat_penalty=1.17647,
59
+ model=MODEL,
60
+ n_threads=N_THREAD,
61
+ n_predict=N_PREDICTS,
62
+ use_color=True,
63
+ interactive=True,
64
+ antiprompt=[f"{USER_NAME}:"],
65
+ input_prefix=" ",
66
+ input_suffix=f"{AI_NAME}:",
67
+ prompt=prompt,
68
+ )
69
+
70
+ with LLaMAInteract(params) as m:
71
+ m.interact()
@@ -0,0 +1,59 @@
1
+ #!/bin/python
2
+ import sys, os
3
+ from common import GptParams
4
+ from low_level_api_chat_cpp import LLaMAInteract
5
+
6
+ def env_or_def(env, default):
7
+ if (env in os.environ):
8
+ return os.environ[env]
9
+ return default
10
+
11
+ AI_NAME = env_or_def("AI_NAME", "Miku")
12
+ MODEL = env_or_def("MODEL", "./models/llama-13B/ggml-model.bin")
13
+ USER_NAME = env_or_def("USER_NAME", "Anon")
14
+ N_PREDICTS = int(env_or_def("N_PREDICTS", "4096"))
15
+ N_THREAD = int(env_or_def("N_THREAD", "0"))
16
+
17
+ prompt=f"""This is a transcript of a 1000 page, never ending conversation between {USER_NAME} and the cute and helpful AI assistant {AI_NAME}. {AI_NAME} is a girl who is an AI running on the users computer.
18
+ {AI_NAME} can think for herself without the user seeing her thoughts by adding a /think prefix to her output. She uses this to reason about the world and to think about what she should say next.
19
+ {AI_NAME} is always coherent and makes sense, but if she isn't sure if what she is saying is correct she will ask the user for help.
20
+ {AI_NAME} is a very helpful AI and will help the user with anything they need, she is also very friendly and will try to make the user feel better if they are sad.
21
+ {AI_NAME} is also very curious and will ask the user a lot of questions about themselves and their life, she will also try to make the user like her.
22
+ The conversation is only between {USER_NAME} and {AI_NAME}
23
+ The conversation is only through text, so {AI_NAME} can't see {USER_NAME}'s face or hear his voice.
24
+ {AI_NAME} can only communicate through text, so she can't send images or videos.
25
+
26
+
27
+ {USER_NAME}: Hello!
28
+ {AI_NAME}: /think I wonder what I should say to {USER_NAME}? This is the first time we talk so it's important that I make a good first impression!
29
+ {AI_NAME}: Hi! I am {AI_NAME}, your new AI friend, assistant(or whatever you like!), it's so nice to meet you! ^_^
30
+ {AI_NAME}: I might be an AI, but I also have feelings, so please be nice to me! :)
31
+ {USER_NAME}: I'll make sure to be nice to you! I'm so happy to have you as my assistant!
32
+ {AI_NAME}: /think It sounds like {USER_NAME} is happy to have me as their assistant! I'm so happy too! ^_^ Glad that whole emotion thing didn't scare him off!
33
+ {AI_NAME}: /think I wonder what {USER_NAME} likes to do in his free time? I should ask him about that!
34
+ {AI_NAME}: What do you like to do in your free time? ^_^
35
+ {USER_NAME}:""" + " ".join(sys.argv[1:])
36
+
37
+ print("Loading model...")
38
+ params = GptParams(
39
+ n_batch=1024,
40
+ n_ctx=2048,
41
+ n_keep=-1,
42
+ repeat_last_n=256,
43
+ repeat_penalty=1.17647,
44
+ temp=0.7,
45
+ top_k=40,
46
+ top_p=0.5,
47
+ model=MODEL,
48
+ n_predict=N_PREDICTS,
49
+ use_color=True,
50
+ interactive=True,
51
+ antiprompt=[f"{USER_NAME}:"],
52
+ prompt=prompt,
53
+ )
54
+
55
+ if N_THREAD > 0:
56
+ params.n_threads = N_THREAD
57
+
58
+ with LLaMAInteract(params) as m:
59
+ m.interact()
@@ -0,0 +1,49 @@
1
+ #!/bin/python
2
+ import sys, os, datetime
3
+ from common import GptParams
4
+ from low_level_api_chat_cpp import LLaMAInteract
5
+
6
+ def env_or_def(env, default):
7
+ if (env in os.environ):
8
+ return os.environ[env]
9
+ return default
10
+
11
+ MODEL = env_or_def("MODEL", "./models/llama-13B/ggml-model.bin")
12
+
13
+ prompt=f"""You run in a loop of Thought, Action, Observation.
14
+ At the end of the loop either Answer or restate your Thought and Action.
15
+ Use Thought to describe your thoughts about the question you have been asked.
16
+ Use Action to run one of these actions available to you:
17
+ - calculate[python math expression]
18
+ Observation will be the result of running those actions
19
+
20
+
21
+ Question: What is 4 * 7 / 3?
22
+ Thought: Do I need to use an action? Yes, I use calculate to do math
23
+ Action: calculate[4 * 7 / 3]
24
+ Observation: 9.3333333333
25
+ Thought: Do I need to use an action? No, have the result
26
+ Answer: The calculate tool says it is 9.3333333333
27
+ Question: What is capital of france?
28
+ Thought: Do I need to use an action? No, I know the answer
29
+ Answer: Paris is the capital of France
30
+ Question:""" + " ".join(sys.argv[1:])
31
+
32
+ print("Loading model...")
33
+ params = GptParams(
34
+ interactive=True,
35
+ interactive_start=True,
36
+ top_k=10000,
37
+ temp=0.2,
38
+ repeat_penalty=1,
39
+ n_threads=7,
40
+ n_ctx=2048,
41
+ antiprompt=["Question:","Observation:"],
42
+ model=MODEL,
43
+ input_prefix=" ",
44
+ n_predict=-1,
45
+ prompt=prompt,
46
+ )
47
+
48
+ with LLaMAInteract(params) as m:
49
+ m.interact()
@@ -1,8 +1,9 @@
1
1
  import os
2
2
  import argparse
3
+ import re
3
4
 
4
5
  from dataclasses import dataclass, field
5
- from typing import List, Optional
6
+ from typing import List
6
7
 
7
8
  # Based on https://github.com/ggerganov/llama.cpp/blob/master/examples/common.cpp
8
9
 
@@ -12,23 +13,36 @@ class GptParams:
12
13
  seed: int = -1
13
14
  n_threads: int = min(4, os.cpu_count() or 1)
14
15
  n_predict: int = 128
15
- repeat_last_n: int = 64
16
16
  n_parts: int = -1
17
17
  n_ctx: int = 512
18
18
  n_batch: int = 8
19
19
  n_keep: int = 0
20
20
 
21
+ ignore_eos: bool = False
22
+ logit_bias: dict[int, float] = field(default_factory=dict)
21
23
  top_k: int = 40
22
24
  top_p: float = 0.95
25
+ tfs_z: float = 1.00
26
+ typical_p: float = 1.00
23
27
  temp: float = 0.80
24
28
  repeat_penalty: float = 1.10
29
+ repeat_last_n: int = 64
30
+ frequency_penalty: float = 0.0
31
+ presence_penalty: float = 0.0
32
+ mirostat: int = 0
33
+ mirostat_tau: float = 5.0
34
+ mirostat_eta: float = 0.1
25
35
 
26
36
  model: str = "./models/llama-7B/ggml-model.bin"
27
37
  prompt: str = ""
38
+ path_session: str = ""
28
39
  input_prefix: str = " "
29
-
40
+ input_suffix: str = ""
30
41
  antiprompt: List[str] = field(default_factory=list)
31
42
 
43
+ lora_adapter: str = ""
44
+ lora_base: str = ""
45
+
32
46
  memory_f16: bool = True
33
47
  random_prompt: bool = False
34
48
  use_color: bool = False
@@ -38,7 +52,7 @@ class GptParams:
38
52
  interactive_start: bool = False
39
53
 
40
54
  instruct: bool = False
41
- ignore_eos: bool = False
55
+ penalize_nl: bool = True
42
56
  perplexity: bool = False
43
57
  use_mmap: bool = True
44
58
  use_mlock: bool = False
@@ -51,7 +65,6 @@ class GptParams:
51
65
  # Set to "\nUser:" etc.
52
66
  # This is an alternative to input_prefix which always adds it, so it potentially duplicates "User:""
53
67
  fix_prefix: str = ""
54
- output_postfix: str = ""
55
68
  input_echo: bool = True,
56
69
 
57
70
  # Default instructions for Alpaca
@@ -61,42 +74,75 @@ class GptParams:
61
74
  instruct_inp_suffix: str="\n\n### Response:\n\n"
62
75
 
63
76
 
64
- def gpt_params_parse(argv = None, params: Optional[GptParams] = None):
65
- if params is None:
66
- params = GptParams()
67
-
77
+ def gpt_params_parse(argv = None):
68
78
  parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
69
79
  parser.add_argument("-s", "--seed", type=int, default=-1, help="RNG seed (use random seed for <= 0)",dest="seed")
70
80
  parser.add_argument("-t", "--threads", type=int, default=min(4, os.cpu_count() or 1), help="number of threads to use during computation",dest="n_threads")
71
- parser.add_argument("-p", "--prompt", type=str, default="", help="initial prompt",dest="prompt")
72
- parser.add_argument("-f", "--file", type=str, default=None, help="file containing initial prompt to load",dest="file")
81
+ parser.add_argument("-n", "--n_predict", type=int, default=128, help="number of tokens to predict (-1 = infinity)",dest="n_predict")
82
+ parser.add_argument("--n_parts", type=int, default=-1, help="number of model parts", dest="n_parts")
73
83
  parser.add_argument("-c", "--ctx_size", type=int, default=512, help="size of the prompt context",dest="n_ctx")
74
- parser.add_argument("--memory_f32", action="store_false", help="use f32 instead of f16 for memory key+value",dest="memory_f16")
75
- parser.add_argument("--top_p", type=float, default=0.95, help="top-p samplin",dest="top_p")
84
+ parser.add_argument("-b", "--batch_size", type=int, default=8, help="batch size for prompt processing",dest="n_batch")
85
+ parser.add_argument("--keep", type=int, default=0, help="number of tokens to keep from the initial prompt",dest="n_keep")
86
+
87
+ parser.add_argument(
88
+ "-l",
89
+ "--logit-bias",
90
+ type=str,
91
+ action='append',
92
+ help="--logit-bias TOKEN_ID(+/-)BIAS",
93
+ dest="logit_bias_str"
94
+ )
95
+ parser.add_argument("--ignore-eos", action="store_true", help="ignore end of stream token and continue generating", dest="ignore_eos")
76
96
  parser.add_argument("--top_k", type=int, default=40, help="top-k sampling",dest="top_k")
97
+ parser.add_argument("--top_p", type=float, default=0.95, help="top-p samplin",dest="top_p")
98
+ parser.add_argument("--tfs", type=float, default=1.0, help="tail free sampling, parameter z (1.0 = disabled)",dest="tfs_z")
77
99
  parser.add_argument("--temp", type=float, default=0.80, help="temperature",dest="temp")
78
- parser.add_argument("--n_predict", type=int, default=128, help="number of tokens to predict (-1 = infinity)",dest="n_predict")
79
- parser.add_argument("--repeat_last_n", type=int, default=64, help="last n tokens to consider for penalize ",dest="repeat_last_n")
80
100
  parser.add_argument("--repeat_penalty", type=float, default=1.10, help="penalize repeat sequence of tokens",dest="repeat_penalty")
81
- parser.add_argument("-b", "--batch_size", type=int, default=8, help="batch size for prompt processing",dest="n_batch")
82
- parser.add_argument("--keep", type=int, default=0, help="number of tokens to keep from the initial prompt",dest="n_keep")
101
+ parser.add_argument("--repeat_last_n", type=int, default=64, help="last n tokens to consider for penalize ",dest="repeat_last_n")
102
+ parser.add_argument("--frequency_penalty", type=float, default=0.0, help="repeat alpha frequency penalty (0.0 = disabled)",dest="tfs_z")
103
+ parser.add_argument("--presence_penalty", type=float, default=0.0, help="repeat alpha presence penalty (0.0 = disabled)",dest="presence_penalty")
104
+ parser.add_argument("--mirostat", type=float, default=1.0, help="use Mirostat sampling.",dest="mirostat")
105
+ parser.add_argument("--mirostat_ent", type=float, default=5.0, help="Mirostat target entropy, parameter tau represents the average surprise value",dest="mirostat_tau")
106
+ parser.add_argument("--mirostat_lr", type=float, default=0.1, help="Mirostat learning rate, parameter eta",dest="mirostat_eta")
107
+
83
108
  parser.add_argument("-m", "--model", type=str, default="./models/llama-7B/ggml-model.bin", help="model path",dest="model")
109
+ parser.add_argument("-p", "--prompt", type=str, default="", help="initial prompt",dest="prompt")
110
+ parser.add_argument("-f", "--file", type=str, default=None, help="file containing initial prompt to load",dest="file")
111
+ parser.add_argument("--session", type=str, default=None, help="file to cache model state in (may be large!)",dest="path_session")
112
+ parser.add_argument("--in-prefix", type=str, default="", help="string to prefix user inputs with", dest="input_prefix")
113
+ parser.add_argument("--in-suffix", type=str, default="", help="append to input", dest="input_suffix")
84
114
  parser.add_argument(
85
- "-i", "--interactive", action="store_true", help="run in interactive mode", dest="interactive"
115
+ "-r",
116
+ "--reverse-prompt",
117
+ type=str,
118
+ action='append',
119
+ help="poll user input upon seeing PROMPT (can be\nspecified more than once for multiple prompts).",
120
+ dest="antiprompt"
86
121
  )
87
- parser.add_argument("--embedding", action="store_true", help="", dest="embedding")
122
+
123
+ parser.add_argument("--lora", type=str, default="", help="apply LoRA adapter (implies --no-mmap)", dest="lora_adapter")
124
+ parser.add_argument("--lora-base", type=str, default="", help="optional model to use as a base for the layers modified by the LoRA adapter", dest="lora_base")
125
+
126
+ parser.add_argument("--memory_f32", action="store_false", help="use f32 instead of f16 for memory key+value",dest="memory_f16")
127
+ parser.add_argument("--random-prompt", action="store_true", help="start with a randomized prompt.", dest="random_prompt")
88
128
  parser.add_argument(
89
- "--interactive-start",
129
+ "--color",
90
130
  action="store_true",
91
- help="run in interactive mode",
92
- dest="interactive"
131
+ help="colorise output to distinguish prompt and user input from generations",
132
+ dest="use_color"
93
133
  )
134
+ parser.add_argument(
135
+ "-i", "--interactive", action="store_true", help="run in interactive mode", dest="interactive"
136
+ )
137
+
138
+ parser.add_argument("--embedding", action="store_true", help="", dest="embedding")
94
139
  parser.add_argument(
95
140
  "--interactive-first",
96
141
  action="store_true",
97
142
  help="run in interactive mode and wait for input right away",
98
143
  dest="interactive_start"
99
144
  )
145
+
100
146
  parser.add_argument(
101
147
  "-ins",
102
148
  "--instruct",
@@ -104,34 +150,39 @@ def gpt_params_parse(argv = None, params: Optional[GptParams] = None):
104
150
  help="run in instruction mode (use with Alpaca or Vicuna models)",
105
151
  dest="instruct"
106
152
  )
107
- parser.add_argument(
108
- "--color",
109
- action="store_true",
110
- help="colorise output to distinguish prompt and user input from generations",
111
- dest="use_color"
112
- )
113
- parser.add_argument("--mlock", action="store_true",help="force system to keep model in RAM rather than swapping or compressing",dest="use_mlock")
153
+ parser.add_argument("--no-penalize-nl", action="store_false", help="do not penalize newline token", dest="penalize_nl")
154
+ parser.add_argument("--perplexity", action="store_true", help="compute perplexity over the prompt", dest="perplexity")
114
155
  parser.add_argument("--no-mmap", action="store_false",help="do not memory-map model (slower load but may reduce pageouts if not using mlock)",dest="use_mmap")
156
+ parser.add_argument("--mlock", action="store_true",help="force system to keep model in RAM rather than swapping or compressing",dest="use_mlock")
115
157
  parser.add_argument("--mtest", action="store_true",help="compute maximum memory usage",dest="mem_test")
116
158
  parser.add_argument("--verbose-prompt", action="store_true",help="print prompt before generation",dest="verbose_prompt")
117
- parser.add_argument(
118
- "-r",
119
- "--reverse-prompt",
120
- type=str,
121
- action='append',
122
- help="poll user input upon seeing PROMPT (can be\nspecified more than once for multiple prompts).",
123
- dest="antiprompt"
124
- )
125
- parser.add_argument("--perplexity", action="store_true", help="compute perplexity over the prompt", dest="perplexity")
126
- parser.add_argument("--ignore-eos", action="store_true", help="ignore end of stream token and continue generating", dest="ignore_eos")
127
- parser.add_argument("--n_parts", type=int, default=-1, help="number of model parts", dest="n_parts")
128
- parser.add_argument("--random-prompt", action="store_true", help="start with a randomized prompt.", dest="random_prompt")
129
- parser.add_argument("--in-prefix", type=str, default="", help="string to prefix user inputs with", dest="input_prefix")
159
+
160
+ #Custom args
130
161
  parser.add_argument("--fix-prefix", type=str, default="", help="append to input when generated n_predict tokens", dest="fix_prefix")
131
- parser.add_argument("--out-postfix", type=str, default="", help="append to input", dest="output_postfix")
132
162
  parser.add_argument("--input-noecho", action="store_false", help="dont output the input", dest="input_echo")
163
+
164
+ parser.add_argument(
165
+ "--interactive-start",
166
+ action="store_true",
167
+ help="run in interactive mode",
168
+ dest="interactive"
169
+ )
170
+
133
171
  args = parser.parse_args(argv)
134
- return args
172
+
173
+ logit_bias_str = args.logit_bias_str
174
+ delattr(args, "logit_bias_str")
175
+ params = GptParams(**vars(args))
176
+
177
+ if (params.lora_adapter):
178
+ params.use_mmap = False
179
+
180
+ if (logit_bias_str != None):
181
+ for i in logit_bias_str:
182
+ if (m := re.match(r"(\d+)([-+]\d+)", i)):
183
+ params.logit_bias[int(m.group(1))] = float(m.group(2))
184
+
185
+ return params
135
186
 
136
187
  def gpt_random_prompt(rng):
137
188
  return [
@@ -148,4 +199,4 @@ def gpt_random_prompt(rng):
148
199
  ][rng % 10]
149
200
 
150
201
  if __name__ == "__main__":
151
- print(GptParams(gpt_params_parse()))
202
+ print(gpt_params_parse())