@fugood/llama.node 0.6.3 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/CMakeLists.txt +40 -30
  2. package/README.md +4 -1
  3. package/lib/binding.js +41 -29
  4. package/lib/binding.ts +26 -25
  5. package/package.json +45 -7
  6. package/scripts/build.js +47 -0
  7. package/scripts/llama.cpp.patch +109 -0
  8. package/src/anyascii.c +22223 -0
  9. package/src/anyascii.h +42 -0
  10. package/src/tts_utils.cpp +20 -7
  11. package/src/tts_utils.h +2 -0
  12. package/bin/darwin/arm64/llama-node.node +0 -0
  13. package/bin/darwin/x64/llama-node.node +0 -0
  14. package/bin/linux/arm64/llama-node.node +0 -0
  15. package/bin/linux/x64/llama-node.node +0 -0
  16. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  17. package/bin/linux-cuda/x64/llama-node.node +0 -0
  18. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  19. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  20. package/bin/win32/x64/llama-node.node +0 -0
  21. package/bin/win32/x64/node.lib +0 -0
  22. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  23. package/bin/win32-vulkan/arm64/node.lib +0 -0
  24. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  25. package/bin/win32-vulkan/x64/node.lib +0 -0
  26. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +0 -233
  27. package/src/llama.cpp/.github/workflows/build.yml +0 -1078
  28. package/src/llama.cpp/.github/workflows/close-issue.yml +0 -28
  29. package/src/llama.cpp/.github/workflows/docker.yml +0 -178
  30. package/src/llama.cpp/.github/workflows/editorconfig.yml +0 -29
  31. package/src/llama.cpp/.github/workflows/gguf-publish.yml +0 -44
  32. package/src/llama.cpp/.github/workflows/labeler.yml +0 -17
  33. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +0 -33
  34. package/src/llama.cpp/.github/workflows/python-lint.yml +0 -30
  35. package/src/llama.cpp/.github/workflows/python-type-check.yml +0 -40
  36. package/src/llama.cpp/.github/workflows/release.yml +0 -739
  37. package/src/llama.cpp/.github/workflows/server.yml +0 -237
  38. package/src/llama.cpp/.github/workflows/winget.yml +0 -42
  39. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +0 -16
  40. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +0 -16
  41. package/src/llama.cpp/cmake/build-info.cmake +0 -64
  42. package/src/llama.cpp/cmake/common.cmake +0 -35
  43. package/src/llama.cpp/cmake/git-vars.cmake +0 -22
  44. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -5
  45. package/src/llama.cpp/common/build-info.cpp.in +0 -4
  46. package/src/llama.cpp/docs/build.md +0 -561
  47. package/src/llama.cpp/examples/CMakeLists.txt +0 -43
  48. package/src/llama.cpp/examples/batched/CMakeLists.txt +0 -5
  49. package/src/llama.cpp/examples/batched/batched.cpp +0 -246
  50. package/src/llama.cpp/examples/chat-13B.bat +0 -57
  51. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -5
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -941
  53. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +0 -35
  54. package/src/llama.cpp/examples/embedding/CMakeLists.txt +0 -5
  55. package/src/llama.cpp/examples/embedding/embedding.cpp +0 -323
  56. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +0 -10
  57. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +0 -194
  58. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +0 -5
  59. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +0 -83
  60. package/src/llama.cpp/examples/gguf/CMakeLists.txt +0 -5
  61. package/src/llama.cpp/examples/gguf/gguf.cpp +0 -265
  62. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +0 -22
  63. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +0 -46
  64. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +0 -295
  65. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +0 -52
  66. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +0 -221
  67. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +0 -24
  68. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +0 -42
  69. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +0 -7093
  70. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +0 -694
  71. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +0 -5
  72. package/src/llama.cpp/examples/gritlm/gritlm.cpp +0 -229
  73. package/src/llama.cpp/examples/jeopardy/questions.txt +0 -100
  74. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -65
  75. package/src/llama.cpp/examples/llama.android/build.gradle.kts +0 -6
  76. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +0 -71
  77. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +0 -53
  78. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +0 -452
  79. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +0 -18
  80. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +0 -5
  81. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -472
  82. package/src/llama.cpp/examples/lookup/CMakeLists.txt +0 -23
  83. package/src/llama.cpp/examples/lookup/lookup-create.cpp +0 -40
  84. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +0 -47
  85. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -157
  86. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -242
  87. package/src/llama.cpp/examples/parallel/CMakeLists.txt +0 -5
  88. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -492
  89. package/src/llama.cpp/examples/passkey/CMakeLists.txt +0 -5
  90. package/src/llama.cpp/examples/passkey/passkey.cpp +0 -277
  91. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +0 -5
  92. package/src/llama.cpp/examples/retrieval/retrieval.cpp +0 -304
  93. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -5
  94. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -246
  95. package/src/llama.cpp/examples/simple/CMakeLists.txt +0 -5
  96. package/src/llama.cpp/examples/simple/simple.cpp +0 -206
  97. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +0 -5
  98. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +0 -206
  99. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +0 -11
  100. package/src/llama.cpp/examples/speculative/CMakeLists.txt +0 -5
  101. package/src/llama.cpp/examples/speculative/speculative.cpp +0 -644
  102. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +0 -5
  103. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +0 -261
  104. package/src/llama.cpp/examples/sycl/CMakeLists.txt +0 -9
  105. package/src/llama.cpp/examples/sycl/build.sh +0 -23
  106. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +0 -13
  107. package/src/llama.cpp/examples/sycl/run-llama2.sh +0 -27
  108. package/src/llama.cpp/examples/sycl/run-llama3.sh +0 -28
  109. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +0 -33
  110. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +0 -9
  111. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +0 -9
  112. package/src/llama.cpp/examples/training/CMakeLists.txt +0 -5
  113. package/src/llama.cpp/examples/training/finetune.cpp +0 -96
  114. package/src/llama.cpp/ggml/cmake/GitVars.cmake +0 -22
  115. package/src/llama.cpp/ggml/cmake/common.cmake +0 -26
  116. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1042
  117. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -255
  118. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -586
  119. package/src/llama.cpp/ggml/src/ggml-backend.cpp +0 -2008
  120. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +0 -87
  121. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +0 -517
  122. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -74
  123. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +0 -179
  124. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +0 -258
  125. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +0 -2863
  126. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +0 -1110
  127. package/src/llama.cpp/ggml/src/ggml-cann/common.h +0 -420
  128. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -2570
  129. package/src/llama.cpp/ggml/src/ggml-common.h +0 -1857
  130. package/src/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +0 -100
  131. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +0 -184
  132. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +0 -15
  133. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +0 -243
  134. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +0 -140
  135. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -131
  136. package/src/llama.cpp/ggml/src/ggml-impl.h +0 -601
  137. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  138. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  139. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +0 -120
  140. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +0 -622
  141. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -113
  142. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -96
  143. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -5124
  144. package/src/llama.cpp/ggml/src/ggml-opt.cpp +0 -1037
  145. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -5232
  146. package/src/llama.cpp/ggml/src/ggml-quants.h +0 -100
  147. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
  148. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -1813
  149. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +0 -189
  150. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +0 -37
  151. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +0 -239
  152. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +0 -39
  153. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -83
  154. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +0 -493
  155. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +0 -197
  156. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +0 -20
  157. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +0 -100
  158. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +0 -20
  159. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +0 -623
  160. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +0 -34
  161. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +0 -701
  162. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +0 -11
  163. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +0 -791
  164. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +0 -1160
  165. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +0 -27
  166. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +0 -2957
  167. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -1536
  168. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +0 -75
  169. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +0 -99
  170. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +0 -311
  171. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +0 -20
  172. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +0 -4443
  173. package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +0 -105
  174. package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +0 -8
  175. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +0 -136
  176. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +0 -21
  177. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -3030
  178. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +0 -33
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +0 -1108
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +0 -27
  181. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +0 -474
  182. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +0 -26
  183. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +0 -46
  184. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +0 -10
  185. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +0 -74
  186. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +0 -83
  187. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +0 -362
  188. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +0 -20
  189. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +0 -264
  190. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +0 -20
  191. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +0 -13
  192. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +0 -23
  193. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +0 -73
  194. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +0 -20
  195. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +0 -1215
  196. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +0 -305
  197. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +0 -10
  198. package/src/llama.cpp/ggml/src/ggml-threading.cpp +0 -12
  199. package/src/llama.cpp/ggml/src/ggml-threading.h +0 -14
  200. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +0 -196
  201. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +0 -10699
  202. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -39
  203. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +0 -751
  204. package/src/llama.cpp/ggml/src/ggml.c +0 -6550
  205. package/src/llama.cpp/ggml/src/gguf.cpp +0 -1330
  206. package/src/llama.cpp/models/.editorconfig +0 -1
  207. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  208. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  209. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  210. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
  211. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
  212. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  213. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  214. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  215. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
  216. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
  217. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  218. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
  219. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
  220. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  221. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
  222. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
  223. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  224. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  225. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  226. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
  227. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
  228. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  229. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
  230. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
  231. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  232. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  233. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  234. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  235. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
  236. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
  237. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  238. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
  239. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
  240. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  241. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  242. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  243. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
  245. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  246. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
  248. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  249. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  250. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
  252. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
  253. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
  255. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
  256. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  257. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
  258. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  259. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
  260. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
  261. package/src/llama.cpp/pocs/CMakeLists.txt +0 -14
  262. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +0 -9
  263. package/src/llama.cpp/pocs/vdot/q8dot.cpp +0 -173
  264. package/src/llama.cpp/pocs/vdot/vdot.cpp +0 -311
  265. package/src/llama.cpp/prompts/LLM-questions.txt +0 -49
  266. package/src/llama.cpp/prompts/alpaca.txt +0 -1
  267. package/src/llama.cpp/prompts/assistant.txt +0 -31
  268. package/src/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
  269. package/src/llama.cpp/prompts/chat-with-bob.txt +0 -7
  270. package/src/llama.cpp/prompts/chat-with-qwen.txt +0 -1
  271. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
  272. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
  273. package/src/llama.cpp/prompts/chat.txt +0 -28
  274. package/src/llama.cpp/prompts/dan-modified.txt +0 -1
  275. package/src/llama.cpp/prompts/dan.txt +0 -1
  276. package/src/llama.cpp/prompts/mnemonics.txt +0 -93
  277. package/src/llama.cpp/prompts/parallel-questions.txt +0 -43
  278. package/src/llama.cpp/prompts/reason-act.txt +0 -18
  279. package/src/llama.cpp/requirements/requirements-all.txt +0 -15
  280. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +0 -2
  281. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +0 -7
  282. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +0 -7
  283. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +0 -5
  284. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +0 -1
  285. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +0 -4
  286. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +0 -3
  287. package/src/llama.cpp/requirements/requirements-pydantic.txt +0 -3
  288. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +0 -1
  289. package/src/llama.cpp/requirements/requirements-tool_bench.txt +0 -12
  290. package/src/llama.cpp/requirements.txt +0 -13
  291. package/src/llama.cpp/scripts/build-info.sh +0 -30
  292. package/src/llama.cpp/scripts/install-oneapi.bat +0 -19
  293. package/src/llama.cpp/scripts/xxd.cmake +0 -16
  294. package/src/llama.cpp/tests/CMakeLists.txt +0 -177
  295. package/src/llama.cpp/tests/get-model.cpp +0 -21
  296. package/src/llama.cpp/tests/get-model.h +0 -2
  297. package/src/llama.cpp/tests/test-arg-parser.cpp +0 -178
  298. package/src/llama.cpp/tests/test-autorelease.cpp +0 -24
  299. package/src/llama.cpp/tests/test-backend-ops.cpp +0 -4793
  300. package/src/llama.cpp/tests/test-barrier.cpp +0 -94
  301. package/src/llama.cpp/tests/test-c.c +0 -7
  302. package/src/llama.cpp/tests/test-chat-template.cpp +0 -417
  303. package/src/llama.cpp/tests/test-chat.cpp +0 -985
  304. package/src/llama.cpp/tests/test-double-float.cpp +0 -57
  305. package/src/llama.cpp/tests/test-gbnf-validator.cpp +0 -109
  306. package/src/llama.cpp/tests/test-gguf.cpp +0 -1338
  307. package/src/llama.cpp/tests/test-grammar-integration.cpp +0 -1308
  308. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +0 -1201
  309. package/src/llama.cpp/tests/test-grammar-parser.cpp +0 -519
  310. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +0 -1304
  311. package/src/llama.cpp/tests/test-llama-grammar.cpp +0 -408
  312. package/src/llama.cpp/tests/test-log.cpp +0 -39
  313. package/src/llama.cpp/tests/test-model-load-cancel.cpp +0 -27
  314. package/src/llama.cpp/tests/test-mtmd-c-api.c +0 -63
  315. package/src/llama.cpp/tests/test-opt.cpp +0 -904
  316. package/src/llama.cpp/tests/test-quantize-fns.cpp +0 -186
  317. package/src/llama.cpp/tests/test-quantize-perf.cpp +0 -365
  318. package/src/llama.cpp/tests/test-quantize-stats.cpp +0 -424
  319. package/src/llama.cpp/tests/test-regex-partial.cpp +0 -288
  320. package/src/llama.cpp/tests/test-rope.cpp +0 -262
  321. package/src/llama.cpp/tests/test-sampling.cpp +0 -399
  322. package/src/llama.cpp/tests/test-tokenizer-0.cpp +0 -312
  323. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -155
  324. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +0 -125
  325. package/src/llama.cpp/tools/CMakeLists.txt +0 -39
  326. package/src/llama.cpp/tools/batched-bench/CMakeLists.txt +0 -5
  327. package/src/llama.cpp/tools/batched-bench/batched-bench.cpp +0 -204
  328. package/src/llama.cpp/tools/cvector-generator/CMakeLists.txt +0 -5
  329. package/src/llama.cpp/tools/cvector-generator/completions.txt +0 -582
  330. package/src/llama.cpp/tools/cvector-generator/cvector-generator.cpp +0 -508
  331. package/src/llama.cpp/tools/cvector-generator/mean.hpp +0 -48
  332. package/src/llama.cpp/tools/cvector-generator/negative.txt +0 -4
  333. package/src/llama.cpp/tools/cvector-generator/pca.hpp +0 -315
  334. package/src/llama.cpp/tools/cvector-generator/positive.txt +0 -4
  335. package/src/llama.cpp/tools/export-lora/CMakeLists.txt +0 -5
  336. package/src/llama.cpp/tools/export-lora/export-lora.cpp +0 -434
  337. package/src/llama.cpp/tools/gguf-split/CMakeLists.txt +0 -5
  338. package/src/llama.cpp/tools/gguf-split/gguf-split.cpp +0 -583
  339. package/src/llama.cpp/tools/imatrix/CMakeLists.txt +0 -5
  340. package/src/llama.cpp/tools/imatrix/imatrix.cpp +0 -667
  341. package/src/llama.cpp/tools/llama-bench/CMakeLists.txt +0 -5
  342. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +0 -2024
  343. package/src/llama.cpp/tools/main/CMakeLists.txt +0 -5
  344. package/src/llama.cpp/tools/main/main.cpp +0 -977
  345. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +0 -58
  346. package/src/llama.cpp/tools/mtmd/clip-impl.h +0 -462
  347. package/src/llama.cpp/tools/mtmd/clip.cpp +0 -4024
  348. package/src/llama.cpp/tools/mtmd/clip.h +0 -101
  349. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +0 -22
  350. package/src/llama.cpp/tools/mtmd/miniaudio.h +0 -93468
  351. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +0 -855
  352. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +0 -62
  353. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +0 -377
  354. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +0 -297
  355. package/src/llama.cpp/tools/mtmd/mtmd.cpp +0 -942
  356. package/src/llama.cpp/tools/mtmd/mtmd.h +0 -362
  357. package/src/llama.cpp/tools/mtmd/requirements.txt +0 -5
  358. package/src/llama.cpp/tools/perplexity/CMakeLists.txt +0 -5
  359. package/src/llama.cpp/tools/perplexity/perplexity.cpp +0 -2063
  360. package/src/llama.cpp/tools/quantize/CMakeLists.txt +0 -6
  361. package/src/llama.cpp/tools/quantize/quantize.cpp +0 -519
  362. package/src/llama.cpp/tools/rpc/CMakeLists.txt +0 -4
  363. package/src/llama.cpp/tools/rpc/rpc-server.cpp +0 -322
  364. package/src/llama.cpp/tools/run/CMakeLists.txt +0 -16
  365. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.cpp +0 -1995
  366. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.h +0 -137
  367. package/src/llama.cpp/tools/run/run.cpp +0 -1261
  368. package/src/llama.cpp/tools/server/CMakeLists.txt +0 -51
  369. package/src/llama.cpp/tools/server/bench/requirements.txt +0 -2
  370. package/src/llama.cpp/tools/server/httplib.h +0 -10506
  371. package/src/llama.cpp/tools/server/server.cpp +0 -4966
  372. package/src/llama.cpp/tools/server/tests/requirements.txt +0 -8
  373. package/src/llama.cpp/tools/server/utils.hpp +0 -1337
  374. package/src/llama.cpp/tools/tokenize/CMakeLists.txt +0 -5
  375. package/src/llama.cpp/tools/tokenize/tokenize.cpp +0 -416
  376. package/src/llama.cpp/tools/tts/CMakeLists.txt +0 -5
  377. package/src/llama.cpp/tools/tts/tts.cpp +0 -1092
@@ -1,246 +0,0 @@
1
- #include "arg.h"
2
- #include "common.h"
3
- #include "llama.h"
4
-
5
- #include <vector>
6
- #include <cstdio>
7
-
8
- int main(int argc, char ** argv) {
9
- common_params params;
10
-
11
- params.prompt = "The quick brown fox";
12
- params.sampling.seed = 1234;
13
-
14
- if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) {
15
- return 1;
16
- }
17
-
18
- common_init();
19
-
20
- if (params.n_predict < 0) {
21
- params.n_predict = 16;
22
- }
23
-
24
- auto n_past = 0;
25
-
26
- std::string result0;
27
- std::string result1;
28
- std::string result2;
29
-
30
- // init
31
- common_init_result llama_init = common_init_from_params(params);
32
-
33
- llama_model * model = llama_init.model.get();
34
- llama_context * ctx = llama_init.context.get();
35
-
36
- if (model == nullptr || ctx == nullptr) {
37
- fprintf(stderr, "%s : failed to init\n", __func__);
38
- return 1;
39
- }
40
-
41
- auto sparams = llama_sampler_chain_default_params();
42
-
43
- llama_sampler * smpl = llama_sampler_chain_init(sparams);
44
-
45
- llama_sampler_chain_add(smpl, llama_sampler_init_dist(params.sampling.seed));
46
-
47
- // tokenize prompt
48
- auto tokens = common_tokenize(ctx, params.prompt, true);
49
-
50
- // prepare the batch
51
- llama_batch batch = llama_batch_init(tokens.size(), 0, 1);
52
- for (size_t i = 0; i < tokens.size(); i++) {
53
- common_batch_add(batch, tokens[i], i, {0}, false);
54
- }
55
- batch.logits[batch.n_tokens - 1] = true; // generate next token
56
-
57
- // evaluate prompt
58
- llama_decode(ctx, batch);
59
- n_past += batch.n_tokens;
60
-
61
- // save state (rng, logits, embedding and kv_cache) to file
62
- {
63
- std::vector<uint8_t> state_mem(llama_state_get_size(ctx));
64
- const size_t written = llama_state_get_data(ctx, state_mem.data(), state_mem.size());
65
-
66
- FILE *fp_write = fopen("dump_state.bin", "wb");
67
- fwrite(state_mem.data(), 1, written, fp_write);
68
- fclose(fp_write);
69
-
70
- fprintf(stderr, "%s : serialized state into %zd out of a maximum of %zd bytes\n", __func__, written, state_mem.size());
71
- }
72
-
73
- // save state (last tokens)
74
- const auto n_past_saved = n_past;
75
-
76
- // first run
77
- printf("\nfirst run: %s", params.prompt.c_str());
78
-
79
- for (auto i = 0; i < params.n_predict; i++) {
80
- auto next_token = llama_sampler_sample(smpl, ctx, -1);
81
- auto next_token_str = common_token_to_piece(ctx, next_token);
82
-
83
- printf("%s", next_token_str.c_str());
84
- result0 += next_token_str;
85
-
86
- common_batch_clear(batch);
87
- common_batch_add(batch, next_token, n_past, {0}, true);
88
-
89
- if (llama_decode(ctx, batch)) {
90
- fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
91
- llama_batch_free(batch);
92
- return 1;
93
- }
94
- n_past += 1;
95
- }
96
-
97
- printf("\n\n");
98
-
99
- // make new context
100
- llama_context * ctx2 = llama_init_from_model(model, common_context_params_to_llama(params));
101
-
102
- llama_sampler * smpl2 = llama_sampler_chain_init(sparams);
103
-
104
- llama_sampler_chain_add(smpl2, llama_sampler_init_dist(params.sampling.seed));
105
-
106
- printf("\nsecond run: %s", params.prompt.c_str());
107
-
108
- // load state (rng, logits, embedding and kv_cache) from file
109
- {
110
- std::vector<uint8_t> state_mem;
111
-
112
- FILE * fp_read = fopen("dump_state.bin", "rb");
113
- fseek(fp_read, 0, SEEK_END);
114
- state_mem.resize(ftell(fp_read));
115
- fseek(fp_read, 0, SEEK_SET);
116
- const size_t read = fread(state_mem.data(), 1, state_mem.size(), fp_read);
117
- fclose(fp_read);
118
-
119
- if (read != llama_state_set_data(ctx2, state_mem.data(), state_mem.size())) {
120
- fprintf(stderr, "\n%s : failed to read state\n", __func__);
121
- return 1;
122
- }
123
-
124
- fprintf(stderr, "%s : deserialized state from %zd out of a maximum of %zd bytes\n", __func__, read, state_mem.size());
125
- }
126
-
127
- // restore state (last tokens)
128
- n_past = n_past_saved;
129
-
130
- // second run
131
- for (auto i = 0; i < params.n_predict; i++) {
132
- auto next_token = llama_sampler_sample(smpl2, ctx2, -1);
133
- auto next_token_str = common_token_to_piece(ctx2, next_token);
134
-
135
- printf("%s", next_token_str.c_str());
136
- result1 += next_token_str;
137
-
138
- common_batch_clear(batch);
139
- common_batch_add(batch, next_token, n_past, {0}, true);
140
-
141
- if (llama_decode(ctx2, batch)) {
142
- fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
143
- llama_batch_free(batch);
144
- return 1;
145
- }
146
- n_past += 1;
147
- }
148
-
149
- printf("\n\n");
150
-
151
- if (result0 != result1) {
152
- fprintf(stderr, "\n%s : error : the 2 generations are different\n", __func__);
153
- return 1;
154
- }
155
-
156
- // make new context
157
- llama_context * ctx3 = llama_init_from_model(model, common_context_params_to_llama(params));
158
-
159
- llama_sampler * smpl3 = llama_sampler_chain_init(sparams);
160
-
161
- llama_sampler_chain_add(smpl3, llama_sampler_init_dist(params.sampling.seed));
162
-
163
- printf("\nsingle seq run: %s", params.prompt.c_str());
164
-
165
- // load state (rng, logits, embedding and kv_cache) from file
166
- {
167
- std::vector<uint8_t> state_mem;
168
-
169
- FILE * fp_read = fopen("dump_state.bin", "rb");
170
- fseek(fp_read, 0, SEEK_END);
171
- state_mem.resize(ftell(fp_read));
172
- fseek(fp_read, 0, SEEK_SET);
173
- const size_t read = fread(state_mem.data(), 1, state_mem.size(), fp_read);
174
- fclose(fp_read);
175
-
176
- if (read != llama_state_set_data(ctx3, state_mem.data(), state_mem.size())) {
177
- fprintf(stderr, "\n%s : failed to read state\n", __func__);
178
- return 1;
179
- }
180
-
181
- fprintf(stderr, "%s : deserialized state from %zd out of a maximum of %zd bytes\n", __func__, read, state_mem.size());
182
- }
183
-
184
- // restore state (last tokens)
185
- n_past = n_past_saved;
186
-
187
- // save seq 0 and load into seq 1
188
- {
189
- // save kv of seq 0
190
- std::vector<uint8_t> seq_store(llama_state_seq_get_size(ctx3, 0));
191
- const size_t ncopy = llama_state_seq_get_data(ctx3, seq_store.data(), seq_store.size(), 0);
192
- if (ncopy != seq_store.size()) {
193
- fprintf(stderr, "\n%s : seq copy data length %zd does not match expected length %zd\n", __func__, ncopy, seq_store.size());
194
- return 1;
195
- }
196
- fprintf(stderr, "%s : seq 0 copied, %zd bytes\n", __func__, ncopy);
197
-
198
- // erase whole kv
199
- llama_kv_self_clear(ctx3);
200
- fprintf(stderr, "%s : kv cache cleared\n", __func__);
201
-
202
- // restore kv into seq 1
203
- const size_t nset = llama_state_seq_set_data(ctx3, seq_store.data(), seq_store.size(), 1);
204
- if (nset != seq_store.size()) {
205
- fprintf(stderr, "\n%s : seq set data length %zd does not match expected length %zd\n", __func__, nset, seq_store.size());
206
- return 1;
207
- }
208
- fprintf(stderr, "%s : seq 1 restored, %zd bytes\n", __func__, nset);
209
- }
210
-
211
- // third run with seq 1 instead of 0
212
- for (auto i = 0; i < params.n_predict; i++) {
213
- auto next_token = llama_sampler_sample(smpl3, ctx3, -1);
214
- auto next_token_str = common_token_to_piece(ctx3, next_token);
215
-
216
- printf("%s", next_token_str.c_str());
217
- result2 += next_token_str;
218
-
219
- common_batch_clear(batch);
220
- common_batch_add(batch, next_token, n_past, {1}, true);
221
-
222
- if (llama_decode(ctx3, batch)) {
223
- fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
224
- llama_batch_free(batch);
225
- return 1;
226
- }
227
- n_past += 1;
228
- }
229
-
230
- printf("\n");
231
-
232
- llama_sampler_free(smpl);
233
- llama_sampler_free(smpl2);
234
- llama_sampler_free(smpl3);
235
-
236
- llama_batch_free(batch);
237
-
238
- if (result0 != result2) {
239
- fprintf(stderr, "\n%s : error : the seq restore generation is different\n", __func__);
240
- return 1;
241
- }
242
-
243
- fprintf(stderr, "\n%s : success\n", __func__);
244
-
245
- return 0;
246
- }
@@ -1,5 +0,0 @@
1
- set(TARGET llama-simple)
2
- add_executable(${TARGET} simple.cpp)
3
- install(TARGETS ${TARGET} RUNTIME)
4
- target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT})
5
- target_compile_features(${TARGET} PRIVATE cxx_std_17)
@@ -1,206 +0,0 @@
1
- #include "llama.h"
2
- #include <cstdio>
3
- #include <cstring>
4
- #include <string>
5
- #include <vector>
6
-
7
- static void print_usage(int, char ** argv) {
8
- printf("\nexample usage:\n");
9
- printf("\n %s -m model.gguf [-n n_predict] [-ngl n_gpu_layers] [prompt]\n", argv[0]);
10
- printf("\n");
11
- }
12
-
13
- int main(int argc, char ** argv) {
14
- // path to the model gguf file
15
- std::string model_path;
16
- // prompt to generate text from
17
- std::string prompt = "Hello my name is";
18
- // number of layers to offload to the GPU
19
- int ngl = 99;
20
- // number of tokens to predict
21
- int n_predict = 32;
22
-
23
- // parse command line arguments
24
-
25
- {
26
- int i = 1;
27
- for (; i < argc; i++) {
28
- if (strcmp(argv[i], "-m") == 0) {
29
- if (i + 1 < argc) {
30
- model_path = argv[++i];
31
- } else {
32
- print_usage(argc, argv);
33
- return 1;
34
- }
35
- } else if (strcmp(argv[i], "-n") == 0) {
36
- if (i + 1 < argc) {
37
- try {
38
- n_predict = std::stoi(argv[++i]);
39
- } catch (...) {
40
- print_usage(argc, argv);
41
- return 1;
42
- }
43
- } else {
44
- print_usage(argc, argv);
45
- return 1;
46
- }
47
- } else if (strcmp(argv[i], "-ngl") == 0) {
48
- if (i + 1 < argc) {
49
- try {
50
- ngl = std::stoi(argv[++i]);
51
- } catch (...) {
52
- print_usage(argc, argv);
53
- return 1;
54
- }
55
- } else {
56
- print_usage(argc, argv);
57
- return 1;
58
- }
59
- } else {
60
- // prompt starts here
61
- break;
62
- }
63
- }
64
- if (model_path.empty()) {
65
- print_usage(argc, argv);
66
- return 1;
67
- }
68
- if (i < argc) {
69
- prompt = argv[i++];
70
- for (; i < argc; i++) {
71
- prompt += " ";
72
- prompt += argv[i];
73
- }
74
- }
75
- }
76
-
77
- // load dynamic backends
78
-
79
- ggml_backend_load_all();
80
-
81
- // initialize the model
82
-
83
- llama_model_params model_params = llama_model_default_params();
84
- model_params.n_gpu_layers = ngl;
85
-
86
- llama_model * model = llama_model_load_from_file(model_path.c_str(), model_params);
87
-
88
- if (model == NULL) {
89
- fprintf(stderr , "%s: error: unable to load model\n" , __func__);
90
- return 1;
91
- }
92
-
93
- const llama_vocab * vocab = llama_model_get_vocab(model);
94
- // tokenize the prompt
95
-
96
- // find the number of tokens in the prompt
97
- const int n_prompt = -llama_tokenize(vocab, prompt.c_str(), prompt.size(), NULL, 0, true, true);
98
-
99
- // allocate space for the tokens and tokenize the prompt
100
- std::vector<llama_token> prompt_tokens(n_prompt);
101
- if (llama_tokenize(vocab, prompt.c_str(), prompt.size(), prompt_tokens.data(), prompt_tokens.size(), true, true) < 0) {
102
- fprintf(stderr, "%s: error: failed to tokenize the prompt\n", __func__);
103
- return 1;
104
- }
105
-
106
- // initialize the context
107
-
108
- llama_context_params ctx_params = llama_context_default_params();
109
- // n_ctx is the context size
110
- ctx_params.n_ctx = n_prompt + n_predict - 1;
111
- // n_batch is the maximum number of tokens that can be processed in a single call to llama_decode
112
- ctx_params.n_batch = n_prompt;
113
- // enable performance counters
114
- ctx_params.no_perf = false;
115
-
116
- llama_context * ctx = llama_init_from_model(model, ctx_params);
117
-
118
- if (ctx == NULL) {
119
- fprintf(stderr , "%s: error: failed to create the llama_context\n" , __func__);
120
- return 1;
121
- }
122
-
123
- // initialize the sampler
124
-
125
- auto sparams = llama_sampler_chain_default_params();
126
- sparams.no_perf = false;
127
- llama_sampler * smpl = llama_sampler_chain_init(sparams);
128
-
129
- llama_sampler_chain_add(smpl, llama_sampler_init_greedy());
130
-
131
- // print the prompt token-by-token
132
-
133
- for (auto id : prompt_tokens) {
134
- char buf[128];
135
- int n = llama_token_to_piece(vocab, id, buf, sizeof(buf), 0, true);
136
- if (n < 0) {
137
- fprintf(stderr, "%s: error: failed to convert token to piece\n", __func__);
138
- return 1;
139
- }
140
- std::string s(buf, n);
141
- printf("%s", s.c_str());
142
- }
143
-
144
- // prepare a batch for the prompt
145
-
146
- llama_batch batch = llama_batch_get_one(prompt_tokens.data(), prompt_tokens.size());
147
-
148
- // main loop
149
-
150
- const auto t_main_start = ggml_time_us();
151
- int n_decode = 0;
152
- llama_token new_token_id;
153
-
154
- for (int n_pos = 0; n_pos + batch.n_tokens < n_prompt + n_predict; ) {
155
- // evaluate the current batch with the transformer model
156
- if (llama_decode(ctx, batch)) {
157
- fprintf(stderr, "%s : failed to eval, return code %d\n", __func__, 1);
158
- return 1;
159
- }
160
-
161
- n_pos += batch.n_tokens;
162
-
163
- // sample the next token
164
- {
165
- new_token_id = llama_sampler_sample(smpl, ctx, -1);
166
-
167
- // is it an end of generation?
168
- if (llama_vocab_is_eog(vocab, new_token_id)) {
169
- break;
170
- }
171
-
172
- char buf[128];
173
- int n = llama_token_to_piece(vocab, new_token_id, buf, sizeof(buf), 0, true);
174
- if (n < 0) {
175
- fprintf(stderr, "%s: error: failed to convert token to piece\n", __func__);
176
- return 1;
177
- }
178
- std::string s(buf, n);
179
- printf("%s", s.c_str());
180
- fflush(stdout);
181
-
182
- // prepare the next batch with the sampled token
183
- batch = llama_batch_get_one(&new_token_id, 1);
184
-
185
- n_decode += 1;
186
- }
187
- }
188
-
189
- printf("\n");
190
-
191
- const auto t_main_end = ggml_time_us();
192
-
193
- fprintf(stderr, "%s: decoded %d tokens in %.2f s, speed: %.2f t/s\n",
194
- __func__, n_decode, (t_main_end - t_main_start) / 1000000.0f, n_decode / ((t_main_end - t_main_start) / 1000000.0f));
195
-
196
- fprintf(stderr, "\n");
197
- llama_perf_sampler_print(smpl);
198
- llama_perf_context_print(ctx);
199
- fprintf(stderr, "\n");
200
-
201
- llama_sampler_free(smpl);
202
- llama_free(ctx);
203
- llama_model_free(model);
204
-
205
- return 0;
206
- }
@@ -1,5 +0,0 @@
1
- set(TARGET llama-simple-chat)
2
- add_executable(${TARGET} simple-chat.cpp)
3
- install(TARGETS ${TARGET} RUNTIME)
4
- target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT})
5
- target_compile_features(${TARGET} PRIVATE cxx_std_17)
@@ -1,206 +0,0 @@
1
- #include "llama.h"
2
- #include <cstdio>
3
- #include <cstring>
4
- #include <iostream>
5
- #include <string>
6
- #include <vector>
7
-
8
- static void print_usage(int, char ** argv) {
9
- printf("\nexample usage:\n");
10
- printf("\n %s -m model.gguf [-c context_size] [-ngl n_gpu_layers]\n", argv[0]);
11
- printf("\n");
12
- }
13
-
14
- int main(int argc, char ** argv) {
15
- std::string model_path;
16
- int ngl = 99;
17
- int n_ctx = 2048;
18
-
19
- // parse command line arguments
20
- for (int i = 1; i < argc; i++) {
21
- try {
22
- if (strcmp(argv[i], "-m") == 0) {
23
- if (i + 1 < argc) {
24
- model_path = argv[++i];
25
- } else {
26
- print_usage(argc, argv);
27
- return 1;
28
- }
29
- } else if (strcmp(argv[i], "-c") == 0) {
30
- if (i + 1 < argc) {
31
- n_ctx = std::stoi(argv[++i]);
32
- } else {
33
- print_usage(argc, argv);
34
- return 1;
35
- }
36
- } else if (strcmp(argv[i], "-ngl") == 0) {
37
- if (i + 1 < argc) {
38
- ngl = std::stoi(argv[++i]);
39
- } else {
40
- print_usage(argc, argv);
41
- return 1;
42
- }
43
- } else {
44
- print_usage(argc, argv);
45
- return 1;
46
- }
47
- } catch (std::exception & e) {
48
- fprintf(stderr, "error: %s\n", e.what());
49
- print_usage(argc, argv);
50
- return 1;
51
- }
52
- }
53
- if (model_path.empty()) {
54
- print_usage(argc, argv);
55
- return 1;
56
- }
57
-
58
- // only print errors
59
- llama_log_set([](enum ggml_log_level level, const char * text, void * /* user_data */) {
60
- if (level >= GGML_LOG_LEVEL_ERROR) {
61
- fprintf(stderr, "%s", text);
62
- }
63
- }, nullptr);
64
-
65
- // load dynamic backends
66
- ggml_backend_load_all();
67
-
68
- // initialize the model
69
- llama_model_params model_params = llama_model_default_params();
70
- model_params.n_gpu_layers = ngl;
71
-
72
- llama_model * model = llama_model_load_from_file(model_path.c_str(), model_params);
73
- if (!model) {
74
- fprintf(stderr , "%s: error: unable to load model\n" , __func__);
75
- return 1;
76
- }
77
-
78
- const llama_vocab * vocab = llama_model_get_vocab(model);
79
-
80
- // initialize the context
81
- llama_context_params ctx_params = llama_context_default_params();
82
- ctx_params.n_ctx = n_ctx;
83
- ctx_params.n_batch = n_ctx;
84
-
85
- llama_context * ctx = llama_init_from_model(model, ctx_params);
86
- if (!ctx) {
87
- fprintf(stderr , "%s: error: failed to create the llama_context\n" , __func__);
88
- return 1;
89
- }
90
-
91
- // initialize the sampler
92
- llama_sampler * smpl = llama_sampler_chain_init(llama_sampler_chain_default_params());
93
- llama_sampler_chain_add(smpl, llama_sampler_init_min_p(0.05f, 1));
94
- llama_sampler_chain_add(smpl, llama_sampler_init_temp(0.8f));
95
- llama_sampler_chain_add(smpl, llama_sampler_init_dist(LLAMA_DEFAULT_SEED));
96
-
97
- // helper function to evaluate a prompt and generate a response
98
- auto generate = [&](const std::string & prompt) {
99
- std::string response;
100
-
101
- const bool is_first = llama_kv_self_seq_pos_max(ctx, 0) == 0;
102
-
103
- // tokenize the prompt
104
- const int n_prompt_tokens = -llama_tokenize(vocab, prompt.c_str(), prompt.size(), NULL, 0, is_first, true);
105
- std::vector<llama_token> prompt_tokens(n_prompt_tokens);
106
- if (llama_tokenize(vocab, prompt.c_str(), prompt.size(), prompt_tokens.data(), prompt_tokens.size(), is_first, true) < 0) {
107
- GGML_ABORT("failed to tokenize the prompt\n");
108
- }
109
-
110
- // prepare a batch for the prompt
111
- llama_batch batch = llama_batch_get_one(prompt_tokens.data(), prompt_tokens.size());
112
- llama_token new_token_id;
113
- while (true) {
114
- // check if we have enough space in the context to evaluate this batch
115
- int n_ctx = llama_n_ctx(ctx);
116
- int n_ctx_used = llama_kv_self_seq_pos_max(ctx, 0);
117
- if (n_ctx_used + batch.n_tokens > n_ctx) {
118
- printf("\033[0m\n");
119
- fprintf(stderr, "context size exceeded\n");
120
- exit(0);
121
- }
122
-
123
- if (llama_decode(ctx, batch)) {
124
- GGML_ABORT("failed to decode\n");
125
- }
126
-
127
- // sample the next token
128
- new_token_id = llama_sampler_sample(smpl, ctx, -1);
129
-
130
- // is it an end of generation?
131
- if (llama_vocab_is_eog(vocab, new_token_id)) {
132
- break;
133
- }
134
-
135
- // convert the token to a string, print it and add it to the response
136
- char buf[256];
137
- int n = llama_token_to_piece(vocab, new_token_id, buf, sizeof(buf), 0, true);
138
- if (n < 0) {
139
- GGML_ABORT("failed to convert token to piece\n");
140
- }
141
- std::string piece(buf, n);
142
- printf("%s", piece.c_str());
143
- fflush(stdout);
144
- response += piece;
145
-
146
- // prepare the next batch with the sampled token
147
- batch = llama_batch_get_one(&new_token_id, 1);
148
- }
149
-
150
- return response;
151
- };
152
-
153
- std::vector<llama_chat_message> messages;
154
- std::vector<char> formatted(llama_n_ctx(ctx));
155
- int prev_len = 0;
156
- while (true) {
157
- // get user input
158
- printf("\033[32m> \033[0m");
159
- std::string user;
160
- std::getline(std::cin, user);
161
-
162
- if (user.empty()) {
163
- break;
164
- }
165
-
166
- const char * tmpl = llama_model_chat_template(model, /* name */ nullptr);
167
-
168
- // add the user input to the message list and format it
169
- messages.push_back({"user", strdup(user.c_str())});
170
- int new_len = llama_chat_apply_template(tmpl, messages.data(), messages.size(), true, formatted.data(), formatted.size());
171
- if (new_len > (int)formatted.size()) {
172
- formatted.resize(new_len);
173
- new_len = llama_chat_apply_template(tmpl, messages.data(), messages.size(), true, formatted.data(), formatted.size());
174
- }
175
- if (new_len < 0) {
176
- fprintf(stderr, "failed to apply the chat template\n");
177
- return 1;
178
- }
179
-
180
- // remove previous messages to obtain the prompt to generate the response
181
- std::string prompt(formatted.begin() + prev_len, formatted.begin() + new_len);
182
-
183
- // generate a response
184
- printf("\033[33m");
185
- std::string response = generate(prompt);
186
- printf("\n\033[0m");
187
-
188
- // add the response to the messages
189
- messages.push_back({"assistant", strdup(response.c_str())});
190
- prev_len = llama_chat_apply_template(tmpl, messages.data(), messages.size(), false, nullptr, 0);
191
- if (prev_len < 0) {
192
- fprintf(stderr, "failed to apply the chat template\n");
193
- return 1;
194
- }
195
- }
196
-
197
- // free resources
198
- for (auto & msg : messages) {
199
- free(const_cast<char *>(msg.content));
200
- }
201
- llama_sampler_free(smpl);
202
- llama_free(ctx);
203
- llama_model_free(model);
204
-
205
- return 0;
206
- }
@@ -1,11 +0,0 @@
1
- cmake_minimum_required(VERSION 3.12)
2
- project(llama-simple-cmake-pkg)
3
-
4
- set(TARGET llama-simple-cmake-pkg)
5
-
6
- find_package(Llama REQUIRED)
7
-
8
- add_executable(${TARGET} ${CMAKE_CURRENT_LIST_DIR}/../simple/simple.cpp)
9
- install(TARGETS ${TARGET} RUNTIME)
10
- target_link_libraries(${TARGET} PRIVATE llama ggml::all ${CMAKE_THREAD_LIBS_INIT})
11
- target_compile_features(${TARGET} PRIVATE cxx_std_17)
@@ -1,5 +0,0 @@
1
- set(TARGET llama-speculative)
2
- add_executable(${TARGET} speculative.cpp)
3
- install(TARGETS ${TARGET} RUNTIME)
4
- target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
5
- target_compile_features(${TARGET} PRIVATE cxx_std_17)