@fugood/llama.node 0.6.3 → 1.0.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/CMakeLists.txt +40 -30
  2. package/README.md +4 -1
  3. package/lib/binding.js +41 -29
  4. package/lib/binding.ts +26 -25
  5. package/package.json +40 -7
  6. package/scripts/build.js +47 -0
  7. package/scripts/llama.cpp.patch +109 -0
  8. package/src/anyascii.c +22223 -0
  9. package/src/anyascii.h +42 -0
  10. package/src/tts_utils.cpp +20 -7
  11. package/src/tts_utils.h +2 -0
  12. package/bin/darwin/arm64/llama-node.node +0 -0
  13. package/bin/darwin/x64/llama-node.node +0 -0
  14. package/bin/linux/arm64/llama-node.node +0 -0
  15. package/bin/linux/x64/llama-node.node +0 -0
  16. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  17. package/bin/linux-cuda/x64/llama-node.node +0 -0
  18. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  19. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  20. package/bin/win32/x64/llama-node.node +0 -0
  21. package/bin/win32/x64/node.lib +0 -0
  22. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  23. package/bin/win32-vulkan/arm64/node.lib +0 -0
  24. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  25. package/bin/win32-vulkan/x64/node.lib +0 -0
  26. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +0 -233
  27. package/src/llama.cpp/.github/workflows/build.yml +0 -1078
  28. package/src/llama.cpp/.github/workflows/close-issue.yml +0 -28
  29. package/src/llama.cpp/.github/workflows/docker.yml +0 -178
  30. package/src/llama.cpp/.github/workflows/editorconfig.yml +0 -29
  31. package/src/llama.cpp/.github/workflows/gguf-publish.yml +0 -44
  32. package/src/llama.cpp/.github/workflows/labeler.yml +0 -17
  33. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +0 -33
  34. package/src/llama.cpp/.github/workflows/python-lint.yml +0 -30
  35. package/src/llama.cpp/.github/workflows/python-type-check.yml +0 -40
  36. package/src/llama.cpp/.github/workflows/release.yml +0 -739
  37. package/src/llama.cpp/.github/workflows/server.yml +0 -237
  38. package/src/llama.cpp/.github/workflows/winget.yml +0 -42
  39. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +0 -16
  40. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +0 -16
  41. package/src/llama.cpp/cmake/build-info.cmake +0 -64
  42. package/src/llama.cpp/cmake/common.cmake +0 -35
  43. package/src/llama.cpp/cmake/git-vars.cmake +0 -22
  44. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -5
  45. package/src/llama.cpp/common/build-info.cpp.in +0 -4
  46. package/src/llama.cpp/docs/build.md +0 -561
  47. package/src/llama.cpp/examples/CMakeLists.txt +0 -43
  48. package/src/llama.cpp/examples/batched/CMakeLists.txt +0 -5
  49. package/src/llama.cpp/examples/batched/batched.cpp +0 -246
  50. package/src/llama.cpp/examples/chat-13B.bat +0 -57
  51. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -5
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -941
  53. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +0 -35
  54. package/src/llama.cpp/examples/embedding/CMakeLists.txt +0 -5
  55. package/src/llama.cpp/examples/embedding/embedding.cpp +0 -323
  56. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +0 -10
  57. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +0 -194
  58. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +0 -5
  59. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +0 -83
  60. package/src/llama.cpp/examples/gguf/CMakeLists.txt +0 -5
  61. package/src/llama.cpp/examples/gguf/gguf.cpp +0 -265
  62. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +0 -22
  63. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +0 -46
  64. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +0 -295
  65. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +0 -52
  66. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +0 -221
  67. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +0 -24
  68. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +0 -42
  69. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +0 -7093
  70. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +0 -694
  71. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +0 -5
  72. package/src/llama.cpp/examples/gritlm/gritlm.cpp +0 -229
  73. package/src/llama.cpp/examples/jeopardy/questions.txt +0 -100
  74. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -65
  75. package/src/llama.cpp/examples/llama.android/build.gradle.kts +0 -6
  76. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +0 -71
  77. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +0 -53
  78. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +0 -452
  79. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +0 -18
  80. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +0 -5
  81. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -472
  82. package/src/llama.cpp/examples/lookup/CMakeLists.txt +0 -23
  83. package/src/llama.cpp/examples/lookup/lookup-create.cpp +0 -40
  84. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +0 -47
  85. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -157
  86. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -242
  87. package/src/llama.cpp/examples/parallel/CMakeLists.txt +0 -5
  88. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -492
  89. package/src/llama.cpp/examples/passkey/CMakeLists.txt +0 -5
  90. package/src/llama.cpp/examples/passkey/passkey.cpp +0 -277
  91. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +0 -5
  92. package/src/llama.cpp/examples/retrieval/retrieval.cpp +0 -304
  93. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -5
  94. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -246
  95. package/src/llama.cpp/examples/simple/CMakeLists.txt +0 -5
  96. package/src/llama.cpp/examples/simple/simple.cpp +0 -206
  97. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +0 -5
  98. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +0 -206
  99. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +0 -11
  100. package/src/llama.cpp/examples/speculative/CMakeLists.txt +0 -5
  101. package/src/llama.cpp/examples/speculative/speculative.cpp +0 -644
  102. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +0 -5
  103. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +0 -261
  104. package/src/llama.cpp/examples/sycl/CMakeLists.txt +0 -9
  105. package/src/llama.cpp/examples/sycl/build.sh +0 -23
  106. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +0 -13
  107. package/src/llama.cpp/examples/sycl/run-llama2.sh +0 -27
  108. package/src/llama.cpp/examples/sycl/run-llama3.sh +0 -28
  109. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +0 -33
  110. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +0 -9
  111. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +0 -9
  112. package/src/llama.cpp/examples/training/CMakeLists.txt +0 -5
  113. package/src/llama.cpp/examples/training/finetune.cpp +0 -96
  114. package/src/llama.cpp/ggml/cmake/GitVars.cmake +0 -22
  115. package/src/llama.cpp/ggml/cmake/common.cmake +0 -26
  116. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1042
  117. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -255
  118. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -586
  119. package/src/llama.cpp/ggml/src/ggml-backend.cpp +0 -2008
  120. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +0 -87
  121. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +0 -517
  122. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -74
  123. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +0 -179
  124. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +0 -258
  125. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +0 -2863
  126. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +0 -1110
  127. package/src/llama.cpp/ggml/src/ggml-cann/common.h +0 -420
  128. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -2570
  129. package/src/llama.cpp/ggml/src/ggml-common.h +0 -1857
  130. package/src/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +0 -100
  131. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +0 -184
  132. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +0 -15
  133. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +0 -243
  134. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +0 -140
  135. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -131
  136. package/src/llama.cpp/ggml/src/ggml-impl.h +0 -601
  137. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  138. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  139. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +0 -120
  140. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +0 -622
  141. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -113
  142. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -96
  143. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -5124
  144. package/src/llama.cpp/ggml/src/ggml-opt.cpp +0 -1037
  145. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -5232
  146. package/src/llama.cpp/ggml/src/ggml-quants.h +0 -100
  147. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
  148. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -1813
  149. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +0 -189
  150. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +0 -37
  151. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +0 -239
  152. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +0 -39
  153. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -83
  154. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +0 -493
  155. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +0 -197
  156. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +0 -20
  157. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +0 -100
  158. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +0 -20
  159. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +0 -623
  160. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +0 -34
  161. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +0 -701
  162. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +0 -11
  163. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +0 -791
  164. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +0 -1160
  165. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +0 -27
  166. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +0 -2957
  167. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -1536
  168. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +0 -75
  169. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +0 -99
  170. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +0 -311
  171. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +0 -20
  172. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +0 -4443
  173. package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +0 -105
  174. package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +0 -8
  175. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +0 -136
  176. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +0 -21
  177. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -3030
  178. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +0 -33
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +0 -1108
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +0 -27
  181. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +0 -474
  182. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +0 -26
  183. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +0 -46
  184. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +0 -10
  185. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +0 -74
  186. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +0 -83
  187. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +0 -362
  188. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +0 -20
  189. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +0 -264
  190. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +0 -20
  191. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +0 -13
  192. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +0 -23
  193. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +0 -73
  194. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +0 -20
  195. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +0 -1215
  196. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +0 -305
  197. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +0 -10
  198. package/src/llama.cpp/ggml/src/ggml-threading.cpp +0 -12
  199. package/src/llama.cpp/ggml/src/ggml-threading.h +0 -14
  200. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +0 -196
  201. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +0 -10699
  202. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -39
  203. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +0 -751
  204. package/src/llama.cpp/ggml/src/ggml.c +0 -6550
  205. package/src/llama.cpp/ggml/src/gguf.cpp +0 -1330
  206. package/src/llama.cpp/models/.editorconfig +0 -1
  207. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  208. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  209. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  210. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
  211. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
  212. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  213. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  214. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  215. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
  216. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
  217. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  218. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
  219. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
  220. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  221. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
  222. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
  223. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  224. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  225. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  226. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
  227. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
  228. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  229. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
  230. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
  231. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  232. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  233. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  234. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  235. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
  236. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
  237. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  238. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
  239. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
  240. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  241. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  242. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  243. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
  245. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  246. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
  248. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  249. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  250. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
  252. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
  253. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
  255. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
  256. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  257. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
  258. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  259. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
  260. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
  261. package/src/llama.cpp/pocs/CMakeLists.txt +0 -14
  262. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +0 -9
  263. package/src/llama.cpp/pocs/vdot/q8dot.cpp +0 -173
  264. package/src/llama.cpp/pocs/vdot/vdot.cpp +0 -311
  265. package/src/llama.cpp/prompts/LLM-questions.txt +0 -49
  266. package/src/llama.cpp/prompts/alpaca.txt +0 -1
  267. package/src/llama.cpp/prompts/assistant.txt +0 -31
  268. package/src/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
  269. package/src/llama.cpp/prompts/chat-with-bob.txt +0 -7
  270. package/src/llama.cpp/prompts/chat-with-qwen.txt +0 -1
  271. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
  272. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
  273. package/src/llama.cpp/prompts/chat.txt +0 -28
  274. package/src/llama.cpp/prompts/dan-modified.txt +0 -1
  275. package/src/llama.cpp/prompts/dan.txt +0 -1
  276. package/src/llama.cpp/prompts/mnemonics.txt +0 -93
  277. package/src/llama.cpp/prompts/parallel-questions.txt +0 -43
  278. package/src/llama.cpp/prompts/reason-act.txt +0 -18
  279. package/src/llama.cpp/requirements/requirements-all.txt +0 -15
  280. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +0 -2
  281. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +0 -7
  282. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +0 -7
  283. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +0 -5
  284. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +0 -1
  285. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +0 -4
  286. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +0 -3
  287. package/src/llama.cpp/requirements/requirements-pydantic.txt +0 -3
  288. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +0 -1
  289. package/src/llama.cpp/requirements/requirements-tool_bench.txt +0 -12
  290. package/src/llama.cpp/requirements.txt +0 -13
  291. package/src/llama.cpp/scripts/build-info.sh +0 -30
  292. package/src/llama.cpp/scripts/install-oneapi.bat +0 -19
  293. package/src/llama.cpp/scripts/xxd.cmake +0 -16
  294. package/src/llama.cpp/tests/CMakeLists.txt +0 -177
  295. package/src/llama.cpp/tests/get-model.cpp +0 -21
  296. package/src/llama.cpp/tests/get-model.h +0 -2
  297. package/src/llama.cpp/tests/test-arg-parser.cpp +0 -178
  298. package/src/llama.cpp/tests/test-autorelease.cpp +0 -24
  299. package/src/llama.cpp/tests/test-backend-ops.cpp +0 -4793
  300. package/src/llama.cpp/tests/test-barrier.cpp +0 -94
  301. package/src/llama.cpp/tests/test-c.c +0 -7
  302. package/src/llama.cpp/tests/test-chat-template.cpp +0 -417
  303. package/src/llama.cpp/tests/test-chat.cpp +0 -985
  304. package/src/llama.cpp/tests/test-double-float.cpp +0 -57
  305. package/src/llama.cpp/tests/test-gbnf-validator.cpp +0 -109
  306. package/src/llama.cpp/tests/test-gguf.cpp +0 -1338
  307. package/src/llama.cpp/tests/test-grammar-integration.cpp +0 -1308
  308. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +0 -1201
  309. package/src/llama.cpp/tests/test-grammar-parser.cpp +0 -519
  310. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +0 -1304
  311. package/src/llama.cpp/tests/test-llama-grammar.cpp +0 -408
  312. package/src/llama.cpp/tests/test-log.cpp +0 -39
  313. package/src/llama.cpp/tests/test-model-load-cancel.cpp +0 -27
  314. package/src/llama.cpp/tests/test-mtmd-c-api.c +0 -63
  315. package/src/llama.cpp/tests/test-opt.cpp +0 -904
  316. package/src/llama.cpp/tests/test-quantize-fns.cpp +0 -186
  317. package/src/llama.cpp/tests/test-quantize-perf.cpp +0 -365
  318. package/src/llama.cpp/tests/test-quantize-stats.cpp +0 -424
  319. package/src/llama.cpp/tests/test-regex-partial.cpp +0 -288
  320. package/src/llama.cpp/tests/test-rope.cpp +0 -262
  321. package/src/llama.cpp/tests/test-sampling.cpp +0 -399
  322. package/src/llama.cpp/tests/test-tokenizer-0.cpp +0 -312
  323. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -155
  324. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +0 -125
  325. package/src/llama.cpp/tools/CMakeLists.txt +0 -39
  326. package/src/llama.cpp/tools/batched-bench/CMakeLists.txt +0 -5
  327. package/src/llama.cpp/tools/batched-bench/batched-bench.cpp +0 -204
  328. package/src/llama.cpp/tools/cvector-generator/CMakeLists.txt +0 -5
  329. package/src/llama.cpp/tools/cvector-generator/completions.txt +0 -582
  330. package/src/llama.cpp/tools/cvector-generator/cvector-generator.cpp +0 -508
  331. package/src/llama.cpp/tools/cvector-generator/mean.hpp +0 -48
  332. package/src/llama.cpp/tools/cvector-generator/negative.txt +0 -4
  333. package/src/llama.cpp/tools/cvector-generator/pca.hpp +0 -315
  334. package/src/llama.cpp/tools/cvector-generator/positive.txt +0 -4
  335. package/src/llama.cpp/tools/export-lora/CMakeLists.txt +0 -5
  336. package/src/llama.cpp/tools/export-lora/export-lora.cpp +0 -434
  337. package/src/llama.cpp/tools/gguf-split/CMakeLists.txt +0 -5
  338. package/src/llama.cpp/tools/gguf-split/gguf-split.cpp +0 -583
  339. package/src/llama.cpp/tools/imatrix/CMakeLists.txt +0 -5
  340. package/src/llama.cpp/tools/imatrix/imatrix.cpp +0 -667
  341. package/src/llama.cpp/tools/llama-bench/CMakeLists.txt +0 -5
  342. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +0 -2024
  343. package/src/llama.cpp/tools/main/CMakeLists.txt +0 -5
  344. package/src/llama.cpp/tools/main/main.cpp +0 -977
  345. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +0 -58
  346. package/src/llama.cpp/tools/mtmd/clip-impl.h +0 -462
  347. package/src/llama.cpp/tools/mtmd/clip.cpp +0 -4024
  348. package/src/llama.cpp/tools/mtmd/clip.h +0 -101
  349. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +0 -22
  350. package/src/llama.cpp/tools/mtmd/miniaudio.h +0 -93468
  351. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +0 -855
  352. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +0 -62
  353. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +0 -377
  354. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +0 -297
  355. package/src/llama.cpp/tools/mtmd/mtmd.cpp +0 -942
  356. package/src/llama.cpp/tools/mtmd/mtmd.h +0 -362
  357. package/src/llama.cpp/tools/mtmd/requirements.txt +0 -5
  358. package/src/llama.cpp/tools/perplexity/CMakeLists.txt +0 -5
  359. package/src/llama.cpp/tools/perplexity/perplexity.cpp +0 -2063
  360. package/src/llama.cpp/tools/quantize/CMakeLists.txt +0 -6
  361. package/src/llama.cpp/tools/quantize/quantize.cpp +0 -519
  362. package/src/llama.cpp/tools/rpc/CMakeLists.txt +0 -4
  363. package/src/llama.cpp/tools/rpc/rpc-server.cpp +0 -322
  364. package/src/llama.cpp/tools/run/CMakeLists.txt +0 -16
  365. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.cpp +0 -1995
  366. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.h +0 -137
  367. package/src/llama.cpp/tools/run/run.cpp +0 -1261
  368. package/src/llama.cpp/tools/server/CMakeLists.txt +0 -51
  369. package/src/llama.cpp/tools/server/bench/requirements.txt +0 -2
  370. package/src/llama.cpp/tools/server/httplib.h +0 -10506
  371. package/src/llama.cpp/tools/server/server.cpp +0 -4966
  372. package/src/llama.cpp/tools/server/tests/requirements.txt +0 -8
  373. package/src/llama.cpp/tools/server/utils.hpp +0 -1337
  374. package/src/llama.cpp/tools/tokenize/CMakeLists.txt +0 -5
  375. package/src/llama.cpp/tools/tokenize/tokenize.cpp +0 -416
  376. package/src/llama.cpp/tools/tts/CMakeLists.txt +0 -5
  377. package/src/llama.cpp/tools/tts/tts.cpp +0 -1092
@@ -1,1261 +0,0 @@
1
- #if defined(_WIN32)
2
- # include <windows.h>
3
- # include <io.h>
4
- #else
5
- # include <sys/file.h>
6
- # include <sys/ioctl.h>
7
- # include <unistd.h>
8
- #endif
9
-
10
- #if defined(LLAMA_USE_CURL)
11
- # include <curl/curl.h>
12
- #endif
13
-
14
- #include <signal.h>
15
-
16
- #include <climits>
17
- #include <cstdarg>
18
- #include <cstdio>
19
- #include <cstring>
20
- #include <filesystem>
21
- #include <iostream>
22
- #include <list>
23
- #include <sstream>
24
- #include <string>
25
- #include <vector>
26
-
27
- #include "chat.h"
28
- #include "common.h"
29
- #include "json.hpp"
30
- #include "linenoise.cpp/linenoise.h"
31
- #include "llama-cpp.h"
32
- #include "log.h"
33
-
34
- #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32)
35
- [[noreturn]] static void sigint_handler(int) {
36
- printf("\n" LOG_COL_DEFAULT);
37
- exit(0); // not ideal, but it's the only way to guarantee exit in all cases
38
- }
39
- #endif
40
-
41
- GGML_ATTRIBUTE_FORMAT(1, 2)
42
- static int printe(const char * fmt, ...) {
43
- va_list args;
44
- va_start(args, fmt);
45
- const int ret = vfprintf(stderr, fmt, args);
46
- va_end(args);
47
-
48
- return ret;
49
- }
50
-
51
- static std::string strftime_fmt(const char * fmt, const std::tm & tm) {
52
- std::ostringstream oss;
53
- oss << std::put_time(&tm, fmt);
54
-
55
- return oss.str();
56
- }
57
-
58
- class Opt {
59
- public:
60
- int init(int argc, const char ** argv) {
61
- ctx_params = llama_context_default_params();
62
- model_params = llama_model_default_params();
63
- context_size_default = ctx_params.n_batch;
64
- n_threads_default = ctx_params.n_threads;
65
- ngl_default = model_params.n_gpu_layers;
66
- common_params_sampling sampling;
67
- temperature_default = sampling.temp;
68
-
69
- if (argc < 2) {
70
- printe("Error: No arguments provided.\n");
71
- print_help();
72
- return 1;
73
- }
74
-
75
- // Parse arguments
76
- if (parse(argc, argv)) {
77
- printe("Error: Failed to parse arguments.\n");
78
- print_help();
79
- return 1;
80
- }
81
-
82
- // If help is requested, show help and exit
83
- if (help) {
84
- print_help();
85
- return 2;
86
- }
87
-
88
- ctx_params.n_batch = context_size >= 0 ? context_size : context_size_default;
89
- ctx_params.n_ctx = ctx_params.n_batch;
90
- ctx_params.n_threads = ctx_params.n_threads_batch = n_threads >= 0 ? n_threads : n_threads_default;
91
- model_params.n_gpu_layers = ngl >= 0 ? ngl : ngl_default;
92
- temperature = temperature >= 0 ? temperature : temperature_default;
93
-
94
- return 0; // Success
95
- }
96
-
97
- llama_context_params ctx_params;
98
- llama_model_params model_params;
99
- std::string model_;
100
- std::string chat_template_file;
101
- std::string user;
102
- bool use_jinja = false;
103
- int context_size = -1, ngl = -1, n_threads = -1;
104
- float temperature = -1;
105
- bool verbose = false;
106
-
107
- private:
108
- int context_size_default = -1, ngl_default = -1, n_threads_default = -1;
109
- float temperature_default = -1;
110
- bool help = false;
111
-
112
- bool parse_flag(const char ** argv, int i, const char * short_opt, const char * long_opt) {
113
- return strcmp(argv[i], short_opt) == 0 || strcmp(argv[i], long_opt) == 0;
114
- }
115
-
116
- int handle_option_with_value(int argc, const char ** argv, int & i, int & option_value) {
117
- if (i + 1 >= argc) {
118
- return 1;
119
- }
120
-
121
- option_value = std::atoi(argv[++i]);
122
-
123
- return 0;
124
- }
125
-
126
- int handle_option_with_value(int argc, const char ** argv, int & i, float & option_value) {
127
- if (i + 1 >= argc) {
128
- return 1;
129
- }
130
-
131
- option_value = std::atof(argv[++i]);
132
-
133
- return 0;
134
- }
135
-
136
- int handle_option_with_value(int argc, const char ** argv, int & i, std::string & option_value) {
137
- if (i + 1 >= argc) {
138
- return 1;
139
- }
140
-
141
- option_value = argv[++i];
142
-
143
- return 0;
144
- }
145
-
146
- int parse_options_with_value(int argc, const char ** argv, int & i, bool & options_parsing) {
147
- if (options_parsing && (strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "--context-size") == 0)) {
148
- if (handle_option_with_value(argc, argv, i, context_size) == 1) {
149
- return 1;
150
- }
151
- } else if (options_parsing &&
152
- (strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "-ngl") == 0 || strcmp(argv[i], "--ngl") == 0)) {
153
- if (handle_option_with_value(argc, argv, i, ngl) == 1) {
154
- return 1;
155
- }
156
- } else if (options_parsing && (strcmp(argv[i], "-t") == 0 || strcmp(argv[i], "--threads") == 0)) {
157
- if (handle_option_with_value(argc, argv, i, n_threads) == 1) {
158
- return 1;
159
- }
160
- } else if (options_parsing && strcmp(argv[i], "--temp") == 0) {
161
- if (handle_option_with_value(argc, argv, i, temperature) == 1) {
162
- return 1;
163
- }
164
- } else if (options_parsing && strcmp(argv[i], "--chat-template-file") == 0) {
165
- if (handle_option_with_value(argc, argv, i, chat_template_file) == 1) {
166
- return 1;
167
- }
168
- use_jinja = true;
169
- } else {
170
- return 2;
171
- }
172
-
173
- return 0;
174
- }
175
-
176
- int parse_options(const char ** argv, int & i, bool & options_parsing) {
177
- if (options_parsing && (parse_flag(argv, i, "-v", "--verbose") || parse_flag(argv, i, "-v", "--log-verbose"))) {
178
- verbose = true;
179
- } else if (options_parsing && strcmp(argv[i], "--jinja") == 0) {
180
- use_jinja = true;
181
- } else if (options_parsing && parse_flag(argv, i, "-h", "--help")) {
182
- help = true;
183
- return 0;
184
- } else if (options_parsing && strcmp(argv[i], "--") == 0) {
185
- options_parsing = false;
186
- } else {
187
- return 2;
188
- }
189
-
190
- return 0;
191
- }
192
-
193
- int parse_positional_args(const char ** argv, int & i, int & positional_args_i) {
194
- if (positional_args_i == 0) {
195
- if (!argv[i][0] || argv[i][0] == '-') {
196
- return 1;
197
- }
198
-
199
- ++positional_args_i;
200
- model_ = argv[i];
201
- } else if (positional_args_i == 1) {
202
- ++positional_args_i;
203
- user = argv[i];
204
- } else {
205
- user += " " + std::string(argv[i]);
206
- }
207
-
208
- return 0;
209
- }
210
-
211
- int parse(int argc, const char ** argv) {
212
- bool options_parsing = true;
213
- for (int i = 1, positional_args_i = 0; i < argc; ++i) {
214
- int ret = parse_options_with_value(argc, argv, i, options_parsing);
215
- if (ret == 0) {
216
- continue;
217
- } else if (ret == 1) {
218
- return ret;
219
- }
220
-
221
- ret = parse_options(argv, i, options_parsing);
222
- if (ret == 0) {
223
- continue;
224
- } else if (ret == 1) {
225
- return ret;
226
- }
227
-
228
- if (parse_positional_args(argv, i, positional_args_i)) {
229
- return 1;
230
- }
231
- }
232
-
233
- if (model_.empty()) {
234
- return 1;
235
- }
236
-
237
- return 0;
238
- }
239
-
240
- void print_help() const {
241
- printf(
242
- "Description:\n"
243
- " Runs a llm\n"
244
- "\n"
245
- "Usage:\n"
246
- " llama-run [options] model [prompt]\n"
247
- "\n"
248
- "Options:\n"
249
- " -c, --context-size <value>\n"
250
- " Context size (default: %d)\n"
251
- " --chat-template-file <path>\n"
252
- " Path to the file containing the chat template to use with the model.\n"
253
- " Only supports jinja templates and implicitly sets the --jinja flag.\n"
254
- " --jinja\n"
255
- " Use jinja templating for the chat template of the model\n"
256
- " -n, -ngl, --ngl <value>\n"
257
- " Number of GPU layers (default: %d)\n"
258
- " --temp <value>\n"
259
- " Temperature (default: %.1f)\n"
260
- " -t, --threads <value>\n"
261
- " Number of threads to use during generation (default: %d)\n"
262
- " -v, --verbose, --log-verbose\n"
263
- " Set verbosity level to infinity (i.e. log all messages, useful for debugging)\n"
264
- " -h, --help\n"
265
- " Show help message\n"
266
- "\n"
267
- "Commands:\n"
268
- " model\n"
269
- " Model is a string with an optional prefix of \n"
270
- " huggingface:// (hf://), modelscope:// (ms://), ollama://, https:// or file://.\n"
271
- " If no protocol is specified and a file exists in the specified\n"
272
- " path, file:// is assumed, otherwise if a file does not exist in\n"
273
- " the specified path, ollama:// is assumed. Models that are being\n"
274
- " pulled are downloaded with .partial extension while being\n"
275
- " downloaded and then renamed as the file without the .partial\n"
276
- " extension when complete.\n"
277
- "\n"
278
- "Examples:\n"
279
- " llama-run llama3\n"
280
- " llama-run ollama://granite-code\n"
281
- " llama-run ollama://smollm:135m\n"
282
- " llama-run hf://QuantFactory/SmolLM-135M-GGUF/SmolLM-135M.Q2_K.gguf\n"
283
- " llama-run "
284
- "huggingface://bartowski/SmolLM-1.7B-Instruct-v0.2-GGUF/SmolLM-1.7B-Instruct-v0.2-IQ3_M.gguf\n"
285
- " llama-run ms://QuantFactory/SmolLM-135M-GGUF/SmolLM-135M.Q2_K.gguf\n"
286
- " llama-run "
287
- "modelscope://bartowski/SmolLM-1.7B-Instruct-v0.2-GGUF/SmolLM-1.7B-Instruct-v0.2-IQ3_M.gguf\n"
288
- " llama-run https://example.com/some-file1.gguf\n"
289
- " llama-run some-file2.gguf\n"
290
- " llama-run file://some-file3.gguf\n"
291
- " llama-run --ngl 999 some-file4.gguf\n"
292
- " llama-run --ngl 999 some-file5.gguf Hello World\n",
293
- context_size_default, ngl_default, temperature_default, n_threads_default);
294
- }
295
- };
296
-
297
- struct progress_data {
298
- size_t file_size = 0;
299
- std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now();
300
- bool printed = false;
301
- };
302
-
303
- static int get_terminal_width() {
304
- #if defined(_WIN32)
305
- CONSOLE_SCREEN_BUFFER_INFO csbi;
306
- GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi);
307
- return csbi.srWindow.Right - csbi.srWindow.Left + 1;
308
- #else
309
- struct winsize w;
310
- ioctl(STDOUT_FILENO, TIOCGWINSZ, &w);
311
- return w.ws_col;
312
- #endif
313
- }
314
-
315
- class File {
316
- public:
317
- FILE * file = nullptr;
318
-
319
- FILE * open(const std::string & filename, const char * mode) {
320
- file = ggml_fopen(filename.c_str(), mode);
321
-
322
- return file;
323
- }
324
-
325
- int lock() {
326
- if (file) {
327
- # ifdef _WIN32
328
- fd = _fileno(file);
329
- hFile = (HANDLE) _get_osfhandle(fd);
330
- if (hFile == INVALID_HANDLE_VALUE) {
331
- fd = -1;
332
-
333
- return 1;
334
- }
335
-
336
- OVERLAPPED overlapped = {};
337
- if (!LockFileEx(hFile, LOCKFILE_EXCLUSIVE_LOCK | LOCKFILE_FAIL_IMMEDIATELY, 0, MAXDWORD, MAXDWORD,
338
- &overlapped)) {
339
- fd = -1;
340
-
341
- return 1;
342
- }
343
- # else
344
- fd = fileno(file);
345
- if (flock(fd, LOCK_EX | LOCK_NB) != 0) {
346
- fd = -1;
347
-
348
- return 1;
349
- }
350
- # endif
351
- }
352
-
353
- return 0;
354
- }
355
-
356
- std::string to_string() {
357
- fseek(file, 0, SEEK_END);
358
- const size_t size = ftell(file);
359
- fseek(file, 0, SEEK_SET);
360
- std::string out;
361
- out.resize(size);
362
- const size_t read_size = fread(&out[0], 1, size, file);
363
- if (read_size != size) {
364
- printe("Error reading file: %s", strerror(errno));
365
- }
366
-
367
- return out;
368
- }
369
-
370
- ~File() {
371
- if (fd >= 0) {
372
- # ifdef _WIN32
373
- if (hFile != INVALID_HANDLE_VALUE) {
374
- OVERLAPPED overlapped = {};
375
- UnlockFileEx(hFile, 0, MAXDWORD, MAXDWORD, &overlapped);
376
- }
377
- # else
378
- flock(fd, LOCK_UN);
379
- # endif
380
- }
381
-
382
- if (file) {
383
- fclose(file);
384
- }
385
- }
386
-
387
- private:
388
- int fd = -1;
389
- # ifdef _WIN32
390
- HANDLE hFile = nullptr;
391
- # endif
392
- };
393
-
394
- #ifdef LLAMA_USE_CURL
395
- class HttpClient {
396
- public:
397
- int init(const std::string & url, const std::vector<std::string> & headers, const std::string & output_file,
398
- const bool progress, std::string * response_str = nullptr) {
399
- if (std::filesystem::exists(output_file)) {
400
- return 0;
401
- }
402
-
403
- std::string output_file_partial;
404
- curl = curl_easy_init();
405
- if (!curl) {
406
- return 1;
407
- }
408
-
409
- progress_data data;
410
- File out;
411
- if (!output_file.empty()) {
412
- output_file_partial = output_file + ".partial";
413
- if (!out.open(output_file_partial, "ab")) {
414
- printe("Failed to open file for writing\n");
415
-
416
- return 1;
417
- }
418
-
419
- if (out.lock()) {
420
- printe("Failed to exclusively lock file\n");
421
-
422
- return 1;
423
- }
424
- }
425
-
426
- set_write_options(response_str, out);
427
- data.file_size = set_resume_point(output_file_partial);
428
- set_progress_options(progress, data);
429
- set_headers(headers);
430
- CURLcode res = perform(url);
431
- if (res != CURLE_OK){
432
- printe("Fetching resource '%s' failed: %s\n", url.c_str(), curl_easy_strerror(res));
433
- return 1;
434
- }
435
- if (!output_file.empty()) {
436
- std::filesystem::rename(output_file_partial, output_file);
437
- }
438
-
439
- return 0;
440
- }
441
-
442
- ~HttpClient() {
443
- if (chunk) {
444
- curl_slist_free_all(chunk);
445
- }
446
-
447
- if (curl) {
448
- curl_easy_cleanup(curl);
449
- }
450
- }
451
-
452
- private:
453
- CURL * curl = nullptr;
454
- struct curl_slist * chunk = nullptr;
455
-
456
- void set_write_options(std::string * response_str, const File & out) {
457
- if (response_str) {
458
- curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, capture_data);
459
- curl_easy_setopt(curl, CURLOPT_WRITEDATA, response_str);
460
- } else {
461
- curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
462
- curl_easy_setopt(curl, CURLOPT_WRITEDATA, out.file);
463
- }
464
- }
465
-
466
- size_t set_resume_point(const std::string & output_file) {
467
- size_t file_size = 0;
468
- if (std::filesystem::exists(output_file)) {
469
- file_size = std::filesystem::file_size(output_file);
470
- curl_easy_setopt(curl, CURLOPT_RESUME_FROM_LARGE, static_cast<curl_off_t>(file_size));
471
- }
472
-
473
- return file_size;
474
- }
475
-
476
- void set_progress_options(bool progress, progress_data & data) {
477
- if (progress) {
478
- curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
479
- curl_easy_setopt(curl, CURLOPT_XFERINFODATA, &data);
480
- curl_easy_setopt(curl, CURLOPT_XFERINFOFUNCTION, update_progress);
481
- }
482
- }
483
-
484
- void set_headers(const std::vector<std::string> & headers) {
485
- if (!headers.empty()) {
486
- if (chunk) {
487
- curl_slist_free_all(chunk);
488
- chunk = 0;
489
- }
490
-
491
- for (const auto & header : headers) {
492
- chunk = curl_slist_append(chunk, header.c_str());
493
- }
494
-
495
- curl_easy_setopt(curl, CURLOPT_HTTPHEADER, chunk);
496
- }
497
- }
498
-
499
- CURLcode perform(const std::string & url) {
500
- curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
501
- curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
502
- curl_easy_setopt(curl, CURLOPT_DEFAULT_PROTOCOL, "https");
503
- curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1L);
504
- return curl_easy_perform(curl);
505
- }
506
-
507
- static std::string human_readable_time(double seconds) {
508
- int hrs = static_cast<int>(seconds) / 3600;
509
- int mins = (static_cast<int>(seconds) % 3600) / 60;
510
- int secs = static_cast<int>(seconds) % 60;
511
-
512
- if (hrs > 0) {
513
- return string_format("%dh %02dm %02ds", hrs, mins, secs);
514
- } else if (mins > 0) {
515
- return string_format("%dm %02ds", mins, secs);
516
- } else {
517
- return string_format("%ds", secs);
518
- }
519
- }
520
-
521
- static std::string human_readable_size(curl_off_t size) {
522
- static const char * suffix[] = { "B", "KB", "MB", "GB", "TB" };
523
- char length = sizeof(suffix) / sizeof(suffix[0]);
524
- int i = 0;
525
- double dbl_size = size;
526
- if (size > 1024) {
527
- for (i = 0; (size / 1024) > 0 && i < length - 1; i++, size /= 1024) {
528
- dbl_size = size / 1024.0;
529
- }
530
- }
531
-
532
- return string_format("%.2f %s", dbl_size, suffix[i]);
533
- }
534
-
535
- static int update_progress(void * ptr, curl_off_t total_to_download, curl_off_t now_downloaded, curl_off_t,
536
- curl_off_t) {
537
- progress_data * data = static_cast<progress_data *>(ptr);
538
- if (total_to_download <= 0) {
539
- return 0;
540
- }
541
-
542
- total_to_download += data->file_size;
543
- const curl_off_t now_downloaded_plus_file_size = now_downloaded + data->file_size;
544
- const curl_off_t percentage = calculate_percentage(now_downloaded_plus_file_size, total_to_download);
545
- std::string progress_prefix = generate_progress_prefix(percentage);
546
-
547
- const double speed = calculate_speed(now_downloaded, data->start_time);
548
- const double tim = (total_to_download - now_downloaded) / speed;
549
- std::string progress_suffix =
550
- generate_progress_suffix(now_downloaded_plus_file_size, total_to_download, speed, tim);
551
-
552
- int progress_bar_width = calculate_progress_bar_width(progress_prefix, progress_suffix);
553
- std::string progress_bar;
554
- generate_progress_bar(progress_bar_width, percentage, progress_bar);
555
-
556
- print_progress(progress_prefix, progress_bar, progress_suffix);
557
- data->printed = true;
558
-
559
- return 0;
560
- }
561
-
562
- static curl_off_t calculate_percentage(curl_off_t now_downloaded_plus_file_size, curl_off_t total_to_download) {
563
- return (now_downloaded_plus_file_size * 100) / total_to_download;
564
- }
565
-
566
- static std::string generate_progress_prefix(curl_off_t percentage) {
567
- return string_format("%3ld%% |", static_cast<long int>(percentage));
568
- }
569
-
570
- static double calculate_speed(curl_off_t now_downloaded, const std::chrono::steady_clock::time_point & start_time) {
571
- const auto now = std::chrono::steady_clock::now();
572
- const std::chrono::duration<double> elapsed_seconds = now - start_time;
573
- return now_downloaded / elapsed_seconds.count();
574
- }
575
-
576
- static std::string generate_progress_suffix(curl_off_t now_downloaded_plus_file_size, curl_off_t total_to_download,
577
- double speed, double estimated_time) {
578
- const int width = 10;
579
- return string_format("%*s/%*s%*s/s%*s", width, human_readable_size(now_downloaded_plus_file_size).c_str(),
580
- width, human_readable_size(total_to_download).c_str(), width,
581
- human_readable_size(speed).c_str(), width, human_readable_time(estimated_time).c_str());
582
- }
583
-
584
- static int calculate_progress_bar_width(const std::string & progress_prefix, const std::string & progress_suffix) {
585
- int progress_bar_width = get_terminal_width() - progress_prefix.size() - progress_suffix.size() - 3;
586
- if (progress_bar_width < 1) {
587
- progress_bar_width = 1;
588
- }
589
-
590
- return progress_bar_width;
591
- }
592
-
593
- static std::string generate_progress_bar(int progress_bar_width, curl_off_t percentage,
594
- std::string & progress_bar) {
595
- const curl_off_t pos = (percentage * progress_bar_width) / 100;
596
- for (int i = 0; i < progress_bar_width; ++i) {
597
- progress_bar.append((i < pos) ? "█" : " ");
598
- }
599
-
600
- return progress_bar;
601
- }
602
-
603
- static void print_progress(const std::string & progress_prefix, const std::string & progress_bar,
604
- const std::string & progress_suffix) {
605
- printe("\r" LOG_CLR_TO_EOL "%s%s| %s", progress_prefix.c_str(), progress_bar.c_str(), progress_suffix.c_str());
606
- }
607
- // Function to write data to a file
608
- static size_t write_data(void * ptr, size_t size, size_t nmemb, void * stream) {
609
- FILE * out = static_cast<FILE *>(stream);
610
- return fwrite(ptr, size, nmemb, out);
611
- }
612
-
613
- // Function to capture data into a string
614
- static size_t capture_data(void * ptr, size_t size, size_t nmemb, void * stream) {
615
- std::string * str = static_cast<std::string *>(stream);
616
- str->append(static_cast<char *>(ptr), size * nmemb);
617
- return size * nmemb;
618
- }
619
- };
620
- #endif
621
-
622
- class LlamaData {
623
- public:
624
- llama_model_ptr model;
625
- llama_sampler_ptr sampler;
626
- llama_context_ptr context;
627
- std::vector<llama_chat_message> messages; // TODO: switch to common_chat_msg
628
- std::list<std::string> msg_strs;
629
- std::vector<char> fmtted;
630
-
631
- int init(Opt & opt) {
632
- model = initialize_model(opt);
633
- if (!model) {
634
- return 1;
635
- }
636
-
637
- context = initialize_context(model, opt);
638
- if (!context) {
639
- return 1;
640
- }
641
-
642
- sampler = initialize_sampler(opt);
643
-
644
- return 0;
645
- }
646
-
647
- private:
648
- #ifdef LLAMA_USE_CURL
649
- int download(const std::string & url, const std::string & output_file, const bool progress,
650
- const std::vector<std::string> & headers = {}, std::string * response_str = nullptr) {
651
- HttpClient http;
652
- if (http.init(url, headers, output_file, progress, response_str)) {
653
- return 1;
654
- }
655
-
656
- return 0;
657
- }
658
- #else
659
- int download(const std::string &, const std::string &, const bool, const std::vector<std::string> & = {},
660
- std::string * = nullptr) {
661
- printe("%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__);
662
-
663
- return 1;
664
- }
665
- #endif
666
-
667
- // Helper function to handle model tag extraction and URL construction
668
- std::pair<std::string, std::string> extract_model_and_tag(std::string & model, const std::string & base_url) {
669
- std::string model_tag = "latest";
670
- const size_t colon_pos = model.find(':');
671
- if (colon_pos != std::string::npos) {
672
- model_tag = model.substr(colon_pos + 1);
673
- model = model.substr(0, colon_pos);
674
- }
675
-
676
- std::string url = base_url + model + "/manifests/" + model_tag;
677
-
678
- return { model, url };
679
- }
680
-
681
- // Helper function to download and parse the manifest
682
- int download_and_parse_manifest(const std::string & url, const std::vector<std::string> & headers,
683
- nlohmann::json & manifest) {
684
- std::string manifest_str;
685
- int ret = download(url, "", false, headers, &manifest_str);
686
- if (ret) {
687
- return ret;
688
- }
689
-
690
- manifest = nlohmann::json::parse(manifest_str);
691
-
692
- return 0;
693
- }
694
-
695
- int dl_from_endpoint(std::string & model_endpoint, std::string & model, const std::string & bn) {
696
- // Find the second occurrence of '/' after protocol string
697
- size_t pos = model.find('/');
698
- pos = model.find('/', pos + 1);
699
- std::string hfr, hff;
700
- std::vector<std::string> headers = { "User-Agent: llama-cpp", "Accept: application/json" };
701
- std::string url;
702
-
703
- if (pos == std::string::npos) {
704
- auto [model_name, manifest_url] = extract_model_and_tag(model, model_endpoint + "v2/");
705
- hfr = model_name;
706
-
707
- nlohmann::json manifest;
708
- int ret = download_and_parse_manifest(manifest_url, headers, manifest);
709
- if (ret) {
710
- return ret;
711
- }
712
-
713
- hff = manifest["ggufFile"]["rfilename"];
714
- } else {
715
- hfr = model.substr(0, pos);
716
- hff = model.substr(pos + 1);
717
- }
718
-
719
- url = model_endpoint + hfr + "/resolve/main/" + hff;
720
-
721
- return download(url, bn, true, headers);
722
- }
723
-
724
- int modelscope_dl(std::string & model, const std::string & bn) {
725
- std::string model_endpoint = "https://modelscope.cn/models/";
726
- return dl_from_endpoint(model_endpoint, model, bn);
727
- }
728
-
729
- int huggingface_dl(std::string & model, const std::string & bn) {
730
- std::string model_endpoint = get_model_endpoint();
731
- return dl_from_endpoint(model_endpoint, model, bn);
732
- }
733
-
734
- int ollama_dl(std::string & model, const std::string & bn) {
735
- const std::vector<std::string> headers = { "Accept: application/vnd.docker.distribution.manifest.v2+json" };
736
- if (model.find('/') == std::string::npos) {
737
- model = "library/" + model;
738
- }
739
-
740
- auto [model_name, manifest_url] = extract_model_and_tag(model, "https://registry.ollama.ai/v2/");
741
- nlohmann::json manifest;
742
- int ret = download_and_parse_manifest(manifest_url, {}, manifest);
743
- if (ret) {
744
- return ret;
745
- }
746
-
747
- std::string layer;
748
- for (const auto & l : manifest["layers"]) {
749
- if (l["mediaType"] == "application/vnd.ollama.image.model") {
750
- layer = l["digest"];
751
- break;
752
- }
753
- }
754
-
755
- std::string blob_url = "https://registry.ollama.ai/v2/" + model_name + "/blobs/" + layer;
756
-
757
- return download(blob_url, bn, true, headers);
758
- }
759
-
760
- int github_dl(const std::string & model, const std::string & bn) {
761
- std::string repository = model;
762
- std::string branch = "main";
763
- const size_t at_pos = model.find('@');
764
- if (at_pos != std::string::npos) {
765
- repository = model.substr(0, at_pos);
766
- branch = model.substr(at_pos + 1);
767
- }
768
-
769
- const std::vector<std::string> repo_parts = string_split(repository, "/");
770
- if (repo_parts.size() < 3) {
771
- printe("Invalid GitHub repository format\n");
772
- return 1;
773
- }
774
-
775
- const std::string & org = repo_parts[0];
776
- const std::string & project = repo_parts[1];
777
- std::string url = "https://raw.githubusercontent.com/" + org + "/" + project + "/" + branch;
778
- for (size_t i = 2; i < repo_parts.size(); ++i) {
779
- url += "/" + repo_parts[i];
780
- }
781
-
782
- return download(url, bn, true);
783
- }
784
-
785
- int s3_dl(const std::string & model, const std::string & bn) {
786
- const size_t slash_pos = model.find('/');
787
- if (slash_pos == std::string::npos) {
788
- return 1;
789
- }
790
-
791
- const std::string bucket = model.substr(0, slash_pos);
792
- const std::string key = model.substr(slash_pos + 1);
793
- const char * access_key = std::getenv("AWS_ACCESS_KEY_ID");
794
- const char * secret_key = std::getenv("AWS_SECRET_ACCESS_KEY");
795
- if (!access_key || !secret_key) {
796
- printe("AWS credentials not found in environment\n");
797
- return 1;
798
- }
799
-
800
- // Generate AWS Signature Version 4 headers
801
- // (Implementation requires HMAC-SHA256 and date handling)
802
- // Get current timestamp
803
- const time_t now = time(nullptr);
804
- const tm tm = *gmtime(&now);
805
- const std::string date = strftime_fmt("%Y%m%d", tm);
806
- const std::string datetime = strftime_fmt("%Y%m%dT%H%M%SZ", tm);
807
- const std::vector<std::string> headers = {
808
- "Authorization: AWS4-HMAC-SHA256 Credential=" + std::string(access_key) + "/" + date +
809
- "/us-east-1/s3/aws4_request",
810
- "x-amz-content-sha256: UNSIGNED-PAYLOAD", "x-amz-date: " + datetime
811
- };
812
-
813
- const std::string url = "https://" + bucket + ".s3.amazonaws.com/" + key;
814
-
815
- return download(url, bn, true, headers);
816
- }
817
-
818
- std::string basename(const std::string & path) {
819
- const size_t pos = path.find_last_of("/\\");
820
- if (pos == std::string::npos) {
821
- return path;
822
- }
823
-
824
- return path.substr(pos + 1);
825
- }
826
-
827
- int rm_until_substring(std::string & model_, const std::string & substring) {
828
- const std::string::size_type pos = model_.find(substring);
829
- if (pos == std::string::npos) {
830
- return 1;
831
- }
832
-
833
- model_ = model_.substr(pos + substring.size()); // Skip past the substring
834
- return 0;
835
- }
836
-
837
- int resolve_model(std::string & model_) {
838
- int ret = 0;
839
- if (string_starts_with(model_, "file://") || std::filesystem::exists(model_)) {
840
- rm_until_substring(model_, "://");
841
-
842
- return ret;
843
- }
844
-
845
- const std::string bn = basename(model_);
846
- if (string_starts_with(model_, "hf://") || string_starts_with(model_, "huggingface://") ||
847
- string_starts_with(model_, "hf.co/")) {
848
- rm_until_substring(model_, "hf.co/");
849
- rm_until_substring(model_, "://");
850
- ret = huggingface_dl(model_, bn);
851
- } else if (string_starts_with(model_, "ms://") || string_starts_with(model_, "modelscope://")) {
852
- rm_until_substring(model_, "://");
853
- ret = modelscope_dl(model_, bn);
854
- } else if ((string_starts_with(model_, "https://") || string_starts_with(model_, "http://")) &&
855
- !string_starts_with(model_, "https://ollama.com/library/")) {
856
- ret = download(model_, bn, true);
857
- } else if (string_starts_with(model_, "github:") || string_starts_with(model_, "github://")) {
858
- rm_until_substring(model_, "github:");
859
- rm_until_substring(model_, "://");
860
- ret = github_dl(model_, bn);
861
- } else if (string_starts_with(model_, "s3://")) {
862
- rm_until_substring(model_, "://");
863
- ret = s3_dl(model_, bn);
864
- } else { // ollama:// or nothing
865
- rm_until_substring(model_, "ollama.com/library/");
866
- rm_until_substring(model_, "://");
867
- ret = ollama_dl(model_, bn);
868
- }
869
-
870
- model_ = bn;
871
-
872
- return ret;
873
- }
874
-
875
- // Initializes the model and returns a unique pointer to it
876
- llama_model_ptr initialize_model(Opt & opt) {
877
- ggml_backend_load_all();
878
- resolve_model(opt.model_);
879
- printe("\r" LOG_CLR_TO_EOL "Loading model");
880
- llama_model_ptr model(llama_model_load_from_file(opt.model_.c_str(), opt.model_params));
881
- if (!model) {
882
- printe("%s: error: unable to load model from file: %s\n", __func__, opt.model_.c_str());
883
- }
884
-
885
- printe("\r" LOG_CLR_TO_EOL);
886
- return model;
887
- }
888
-
889
- // Initializes the context with the specified parameters
890
- llama_context_ptr initialize_context(const llama_model_ptr & model, const Opt & opt) {
891
- llama_context_ptr context(llama_init_from_model(model.get(), opt.ctx_params));
892
- if (!context) {
893
- printe("%s: error: failed to create the llama_context\n", __func__);
894
- }
895
-
896
- return context;
897
- }
898
-
899
- // Initializes and configures the sampler
900
- llama_sampler_ptr initialize_sampler(const Opt & opt) {
901
- llama_sampler_ptr sampler(llama_sampler_chain_init(llama_sampler_chain_default_params()));
902
- llama_sampler_chain_add(sampler.get(), llama_sampler_init_min_p(0.05f, 1));
903
- llama_sampler_chain_add(sampler.get(), llama_sampler_init_temp(opt.temperature));
904
- llama_sampler_chain_add(sampler.get(), llama_sampler_init_dist(LLAMA_DEFAULT_SEED));
905
-
906
- return sampler;
907
- }
908
- };
909
-
910
- // Add a message to `messages` and store its content in `msg_strs`
911
- static void add_message(const char * role, const std::string & text, LlamaData & llama_data) {
912
- llama_data.msg_strs.push_back(std::move(text));
913
- llama_data.messages.push_back({ role, llama_data.msg_strs.back().c_str() });
914
- }
915
-
916
- // Function to apply the chat template and resize `formatted` if needed
917
- static int apply_chat_template(const struct common_chat_templates * tmpls, LlamaData & llama_data, const bool append, bool use_jinja) {
918
- common_chat_templates_inputs inputs;
919
- for (const auto & msg : llama_data.messages) {
920
- common_chat_msg cmsg;
921
- cmsg.role = msg.role;
922
- cmsg.content = msg.content;
923
- inputs.messages.push_back(cmsg);
924
- }
925
- inputs.add_generation_prompt = append;
926
- inputs.use_jinja = use_jinja;
927
-
928
- auto chat_params = common_chat_templates_apply(tmpls, inputs);
929
- // TODO: use other params for tool calls.
930
- auto result = chat_params.prompt;
931
- llama_data.fmtted.resize(result.size() + 1);
932
- memcpy(llama_data.fmtted.data(), result.c_str(), result.size() + 1);
933
- return result.size();
934
- }
935
-
936
- // Function to tokenize the prompt
937
- static int tokenize_prompt(const llama_vocab * vocab, const std::string & prompt,
938
- std::vector<llama_token> & prompt_tokens, const LlamaData & llama_data) {
939
- const bool is_first = llama_kv_self_seq_pos_max(llama_data.context.get(), 0) == 0;
940
-
941
- const int n_prompt_tokens = -llama_tokenize(vocab, prompt.c_str(), prompt.size(), NULL, 0, is_first, true);
942
- prompt_tokens.resize(n_prompt_tokens);
943
- if (llama_tokenize(vocab, prompt.c_str(), prompt.size(), prompt_tokens.data(), prompt_tokens.size(), is_first,
944
- true) < 0) {
945
- printe("failed to tokenize the prompt\n");
946
- return -1;
947
- }
948
-
949
- return n_prompt_tokens;
950
- }
951
-
952
- // Check if we have enough space in the context to evaluate this batch
953
- static int check_context_size(const llama_context_ptr & ctx, const llama_batch & batch) {
954
- const int n_ctx = llama_n_ctx(ctx.get());
955
- const int n_ctx_used = llama_kv_self_seq_pos_max(ctx.get(), 0);
956
- if (n_ctx_used + batch.n_tokens > n_ctx) {
957
- printf(LOG_COL_DEFAULT "\n");
958
- printe("context size exceeded\n");
959
- return 1;
960
- }
961
-
962
- return 0;
963
- }
964
-
965
- // convert the token to a string
966
- static int convert_token_to_string(const llama_vocab * vocab, const llama_token token_id, std::string & piece) {
967
- char buf[256];
968
- int n = llama_token_to_piece(vocab, token_id, buf, sizeof(buf), 0, true);
969
- if (n < 0) {
970
- printe("failed to convert token to piece\n");
971
- return 1;
972
- }
973
-
974
- piece = std::string(buf, n);
975
- return 0;
976
- }
977
-
978
- static void print_word_and_concatenate_to_response(const std::string & piece, std::string & response) {
979
- printf("%s", piece.c_str());
980
- fflush(stdout);
981
- response += piece;
982
- }
983
-
984
- // helper function to evaluate a prompt and generate a response
985
- static int generate(LlamaData & llama_data, const std::string & prompt, std::string & response) {
986
- const llama_vocab * vocab = llama_model_get_vocab(llama_data.model.get());
987
-
988
- std::vector<llama_token> tokens;
989
- if (tokenize_prompt(vocab, prompt, tokens, llama_data) < 0) {
990
- return 1;
991
- }
992
-
993
- // prepare a batch for the prompt
994
- llama_batch batch = llama_batch_get_one(tokens.data(), tokens.size());
995
- llama_token new_token_id;
996
- while (true) {
997
- check_context_size(llama_data.context, batch);
998
- if (llama_decode(llama_data.context.get(), batch)) {
999
- printe("failed to decode\n");
1000
- return 1;
1001
- }
1002
-
1003
- // sample the next token, check is it an end of generation?
1004
- new_token_id = llama_sampler_sample(llama_data.sampler.get(), llama_data.context.get(), -1);
1005
- if (llama_vocab_is_eog(vocab, new_token_id)) {
1006
- break;
1007
- }
1008
-
1009
- std::string piece;
1010
- if (convert_token_to_string(vocab, new_token_id, piece)) {
1011
- return 1;
1012
- }
1013
-
1014
- print_word_and_concatenate_to_response(piece, response);
1015
-
1016
- // prepare the next batch with the sampled token
1017
- batch = llama_batch_get_one(&new_token_id, 1);
1018
- }
1019
-
1020
- printf(LOG_COL_DEFAULT);
1021
- return 0;
1022
- }
1023
-
1024
- static int read_user_input(std::string & user_input) {
1025
- static const char * prompt_prefix_env = std::getenv("LLAMA_PROMPT_PREFIX");
1026
- static const char * prompt_prefix = prompt_prefix_env ? prompt_prefix_env : "> ";
1027
- #ifdef WIN32
1028
- printf("\r" LOG_CLR_TO_EOL LOG_COL_DEFAULT "%s", prompt_prefix);
1029
-
1030
- std::getline(std::cin, user_input);
1031
- if (std::cin.eof()) {
1032
- printf("\n");
1033
- return 1;
1034
- }
1035
- #else
1036
- std::unique_ptr<char, decltype(&std::free)> line(const_cast<char *>(linenoise(prompt_prefix)), free);
1037
- if (!line) {
1038
- return 1;
1039
- }
1040
-
1041
- user_input = line.get();
1042
- #endif
1043
-
1044
- if (user_input == "/bye") {
1045
- return 1;
1046
- }
1047
-
1048
- if (user_input.empty()) {
1049
- return 2;
1050
- }
1051
-
1052
- #ifndef WIN32
1053
- linenoiseHistoryAdd(line.get());
1054
- #endif
1055
-
1056
- return 0; // Should have data in happy path
1057
- }
1058
-
1059
- // Function to generate a response based on the prompt
1060
- static int generate_response(LlamaData & llama_data, const std::string & prompt, std::string & response,
1061
- const bool stdout_a_terminal) {
1062
- // Set response color
1063
- if (stdout_a_terminal) {
1064
- printf(LOG_COL_YELLOW);
1065
- }
1066
-
1067
- if (generate(llama_data, prompt, response)) {
1068
- printe("failed to generate response\n");
1069
- return 1;
1070
- }
1071
-
1072
- // End response with color reset and newline
1073
- printf("\n%s", stdout_a_terminal ? LOG_COL_DEFAULT : "");
1074
- return 0;
1075
- }
1076
-
1077
- // Helper function to apply the chat template and handle errors
1078
- static int apply_chat_template_with_error_handling(const common_chat_templates * tmpls, LlamaData & llama_data, const bool append, int & output_length, bool use_jinja) {
1079
- const int new_len = apply_chat_template(tmpls, llama_data, append, use_jinja);
1080
- if (new_len < 0) {
1081
- printe("failed to apply the chat template\n");
1082
- return -1;
1083
- }
1084
-
1085
- output_length = new_len;
1086
- return 0;
1087
- }
1088
-
1089
- // Helper function to handle user input
1090
- static int handle_user_input(std::string & user_input, const std::string & user) {
1091
- if (!user.empty()) {
1092
- user_input = user;
1093
- return 0; // No need for interactive input
1094
- }
1095
-
1096
- return read_user_input(user_input); // Returns true if input ends the loop
1097
- }
1098
-
1099
- static bool is_stdin_a_terminal() {
1100
- #if defined(_WIN32)
1101
- HANDLE hStdin = GetStdHandle(STD_INPUT_HANDLE);
1102
- DWORD mode;
1103
- return GetConsoleMode(hStdin, &mode);
1104
- #else
1105
- return isatty(STDIN_FILENO);
1106
- #endif
1107
- }
1108
-
1109
- static bool is_stdout_a_terminal() {
1110
- #if defined(_WIN32)
1111
- HANDLE hStdout = GetStdHandle(STD_OUTPUT_HANDLE);
1112
- DWORD mode;
1113
- return GetConsoleMode(hStdout, &mode);
1114
- #else
1115
- return isatty(STDOUT_FILENO);
1116
- #endif
1117
- }
1118
-
1119
- // Function to handle user input
1120
- static int get_user_input(std::string & user_input, const std::string & user) {
1121
- while (true) {
1122
- const int ret = handle_user_input(user_input, user);
1123
- if (ret == 1) {
1124
- return 1;
1125
- }
1126
-
1127
- if (ret == 2) {
1128
- continue;
1129
- }
1130
-
1131
- break;
1132
- }
1133
-
1134
- return 0;
1135
- }
1136
-
1137
- // Reads a chat template file to be used
1138
- static std::string read_chat_template_file(const std::string & chat_template_file) {
1139
- File file;
1140
- if (!file.open(chat_template_file, "r")) {
1141
- printe("Error opening chat template file '%s': %s", chat_template_file.c_str(), strerror(errno));
1142
- return "";
1143
- }
1144
-
1145
- return file.to_string();
1146
- }
1147
-
1148
- static int process_user_message(const Opt & opt, const std::string & user_input, LlamaData & llama_data,
1149
- const common_chat_templates_ptr & chat_templates, int & prev_len,
1150
- const bool stdout_a_terminal) {
1151
- add_message("user", opt.user.empty() ? user_input : opt.user, llama_data);
1152
- int new_len;
1153
- if (apply_chat_template_with_error_handling(chat_templates.get(), llama_data, true, new_len, opt.use_jinja) < 0) {
1154
- return 1;
1155
- }
1156
-
1157
- std::string prompt(llama_data.fmtted.begin() + prev_len, llama_data.fmtted.begin() + new_len);
1158
- std::string response;
1159
- if (generate_response(llama_data, prompt, response, stdout_a_terminal)) {
1160
- return 1;
1161
- }
1162
-
1163
- if (!opt.user.empty()) {
1164
- return 2;
1165
- }
1166
-
1167
- add_message("assistant", response, llama_data);
1168
- if (apply_chat_template_with_error_handling(chat_templates.get(), llama_data, false, prev_len, opt.use_jinja) < 0) {
1169
- return 1;
1170
- }
1171
-
1172
- return 0;
1173
- }
1174
-
1175
- // Main chat loop function
1176
- static int chat_loop(LlamaData & llama_data, const Opt & opt) {
1177
- int prev_len = 0;
1178
- llama_data.fmtted.resize(llama_n_ctx(llama_data.context.get()));
1179
- std::string chat_template;
1180
- if (!opt.chat_template_file.empty()) {
1181
- chat_template = read_chat_template_file(opt.chat_template_file);
1182
- }
1183
-
1184
- common_chat_templates_ptr chat_templates = common_chat_templates_init(llama_data.model.get(), chat_template);
1185
- static const bool stdout_a_terminal = is_stdout_a_terminal();
1186
- while (true) {
1187
- // Get user input
1188
- std::string user_input;
1189
- if (get_user_input(user_input, opt.user) == 1) {
1190
- return 0;
1191
- }
1192
-
1193
- const int ret = process_user_message(opt, user_input, llama_data, chat_templates, prev_len, stdout_a_terminal);
1194
- if (ret == 1) {
1195
- return 1;
1196
- } else if (ret == 2) {
1197
- break;
1198
- }
1199
- }
1200
-
1201
- return 0;
1202
- }
1203
-
1204
- static void log_callback(const enum ggml_log_level level, const char * text, void * p) {
1205
- const Opt * opt = static_cast<Opt *>(p);
1206
- if (opt->verbose || level == GGML_LOG_LEVEL_ERROR) {
1207
- printe("%s", text);
1208
- }
1209
- }
1210
-
1211
- static std::string read_pipe_data() {
1212
- std::ostringstream result;
1213
- result << std::cin.rdbuf(); // Read all data from std::cin
1214
- return result.str();
1215
- }
1216
-
1217
- static void ctrl_c_handling() {
1218
- #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
1219
- struct sigaction sigint_action;
1220
- sigint_action.sa_handler = sigint_handler;
1221
- sigemptyset(&sigint_action.sa_mask);
1222
- sigint_action.sa_flags = 0;
1223
- sigaction(SIGINT, &sigint_action, NULL);
1224
- #elif defined(_WIN32)
1225
- auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
1226
- return (ctrl_type == CTRL_C_EVENT) ? (sigint_handler(SIGINT), true) : false;
1227
- };
1228
- SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
1229
- #endif
1230
- }
1231
-
1232
- int main(int argc, const char ** argv) {
1233
- ctrl_c_handling();
1234
- Opt opt;
1235
- const int ret = opt.init(argc, argv);
1236
- if (ret == 2) {
1237
- return 0;
1238
- } else if (ret) {
1239
- return 1;
1240
- }
1241
-
1242
- if (!is_stdin_a_terminal()) {
1243
- if (!opt.user.empty()) {
1244
- opt.user += "\n\n";
1245
- }
1246
-
1247
- opt.user += read_pipe_data();
1248
- }
1249
-
1250
- llama_log_set(log_callback, &opt);
1251
- LlamaData llama_data;
1252
- if (llama_data.init(opt)) {
1253
- return 1;
1254
- }
1255
-
1256
- if (chat_loop(llama_data, opt)) {
1257
- return 1;
1258
- }
1259
-
1260
- return 0;
1261
- }