@fugood/llama.node 0.2.3 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. package/CMakeLists.txt +6 -3
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +8 -1
  17. package/package.json +3 -3
  18. package/patches/llama.patch +12 -12
  19. package/src/DetokenizeWorker.cpp +1 -1
  20. package/src/LlamaContext.cpp +33 -1
  21. package/src/LlamaContext.h +1 -0
  22. package/src/llama.cpp/.github/workflows/bench.yml +310 -0
  23. package/src/llama.cpp/.github/workflows/build.yml +1315 -0
  24. package/src/llama.cpp/.github/workflows/close-issue.yml +23 -0
  25. package/src/llama.cpp/.github/workflows/docker.yml +116 -0
  26. package/src/llama.cpp/.github/workflows/editorconfig.yml +27 -0
  27. package/src/llama.cpp/.github/workflows/gguf-publish.yml +44 -0
  28. package/src/llama.cpp/.github/workflows/labeler.yml +17 -0
  29. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +65 -0
  30. package/src/llama.cpp/.github/workflows/nix-ci.yml +72 -0
  31. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +22 -0
  32. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +36 -0
  33. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +35 -0
  34. package/src/llama.cpp/.github/workflows/python-lint.yml +23 -0
  35. package/src/llama.cpp/.github/workflows/python-type-check.yml +38 -0
  36. package/src/llama.cpp/.github/workflows/server.yml +183 -0
  37. package/src/llama.cpp/CMakeLists.txt +91 -1245
  38. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +1 -1
  39. package/src/llama.cpp/cmake/build-info.cmake +58 -0
  40. package/src/llama.cpp/cmake/git-vars.cmake +22 -0
  41. package/src/llama.cpp/common/CMakeLists.txt +4 -3
  42. package/src/llama.cpp/common/build-info.cpp.in +4 -0
  43. package/src/llama.cpp/common/common.cpp +1116 -877
  44. package/src/llama.cpp/common/common.h +191 -77
  45. package/src/llama.cpp/common/grammar-parser.cpp +118 -31
  46. package/src/llama.cpp/common/json-schema-to-grammar.cpp +346 -65
  47. package/src/llama.cpp/common/log.h +1 -1
  48. package/src/llama.cpp/common/ngram-cache.h +10 -3
  49. package/src/llama.cpp/common/sampling.cpp +19 -10
  50. package/src/llama.cpp/docs/build.md +353 -0
  51. package/src/llama.cpp/examples/CMakeLists.txt +22 -22
  52. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +1 -1
  53. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +6 -6
  54. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  55. package/src/llama.cpp/examples/batched/batched.cpp +52 -55
  56. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  57. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +20 -72
  58. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +1 -1
  59. package/src/llama.cpp/examples/chat-13B.bat +57 -0
  60. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  61. package/src/llama.cpp/examples/{finetune → cvector-generator}/CMakeLists.txt +2 -2
  62. package/src/llama.cpp/examples/cvector-generator/completions.txt +582 -0
  63. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +503 -0
  64. package/src/llama.cpp/examples/cvector-generator/mean.hpp +48 -0
  65. package/src/llama.cpp/examples/cvector-generator/negative.txt +4 -0
  66. package/src/llama.cpp/examples/cvector-generator/pca.hpp +325 -0
  67. package/src/llama.cpp/examples/cvector-generator/positive.txt +4 -0
  68. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +35 -0
  69. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  70. package/src/llama.cpp/examples/embedding/embedding.cpp +94 -46
  71. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +2 -2
  72. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +4 -6
  73. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  74. package/src/llama.cpp/examples/export-lora/export-lora.cpp +344 -386
  75. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +2 -2
  76. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +30 -25
  77. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  78. package/src/llama.cpp/examples/gguf/gguf.cpp +5 -0
  79. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +15 -0
  80. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +46 -0
  81. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +295 -0
  82. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +52 -0
  83. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +221 -0
  84. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +24 -0
  85. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +42 -0
  86. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +7093 -0
  87. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +693 -0
  88. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  89. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +3 -3
  90. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  91. package/src/llama.cpp/examples/gritlm/gritlm.cpp +6 -2
  92. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  93. package/src/llama.cpp/examples/imatrix/imatrix.cpp +137 -176
  94. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  95. package/src/llama.cpp/examples/infill/infill.cpp +38 -153
  96. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +175 -94
  97. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +65 -0
  98. package/src/llama.cpp/examples/llama.android/build.gradle.kts +6 -0
  99. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +68 -0
  100. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +11 -7
  101. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +2 -2
  102. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +18 -0
  103. package/src/llama.cpp/examples/llava/CMakeLists.txt +6 -5
  104. package/src/llama.cpp/examples/llava/android/build_64.sh +8 -0
  105. package/src/llama.cpp/examples/llava/clip.cpp +23 -14
  106. package/src/llama.cpp/examples/llava/llava-cli.cpp +8 -6
  107. package/src/llama.cpp/examples/llava/requirements.txt +3 -2
  108. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  109. package/src/llama.cpp/examples/lookahead/lookahead.cpp +2 -1
  110. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  111. package/src/llama.cpp/examples/lookup/lookup-create.cpp +2 -0
  112. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  113. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -2
  114. package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
  115. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  116. package/src/llama.cpp/examples/main/main.cpp +98 -75
  117. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +4 -5
  118. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  119. package/src/llama.cpp/examples/parallel/parallel.cpp +2 -1
  120. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  121. package/src/llama.cpp/examples/passkey/passkey.cpp +23 -43
  122. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  123. package/src/llama.cpp/examples/perplexity/perplexity.cpp +13 -10
  124. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  125. package/src/llama.cpp/examples/quantize/quantize.cpp +37 -34
  126. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  127. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +1 -1
  128. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  129. package/src/llama.cpp/examples/retrieval/retrieval.cpp +26 -77
  130. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  131. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +14 -7
  132. package/src/llama.cpp/examples/server/CMakeLists.txt +26 -2
  133. package/src/llama.cpp/examples/server/server.cpp +274 -671
  134. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  135. package/src/llama.cpp/examples/server/utils.hpp +28 -29
  136. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  137. package/src/llama.cpp/examples/simple/simple.cpp +21 -29
  138. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  139. package/src/llama.cpp/examples/speculative/speculative.cpp +2 -1
  140. package/src/llama.cpp/examples/sycl/CMakeLists.txt +1 -1
  141. package/src/llama.cpp/examples/sycl/build.sh +23 -0
  142. package/src/llama.cpp/examples/sycl/run-llama2.sh +36 -0
  143. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +33 -0
  144. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +9 -0
  145. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  146. package/src/llama.cpp/examples/tokenize/tokenize.cpp +16 -2
  147. package/src/llama.cpp/ggml/CMakeLists.txt +253 -0
  148. package/src/llama.cpp/{cmake → ggml/cmake}/FindSIMD.cmake +6 -6
  149. package/src/llama.cpp/{ggml-backend.h → ggml/include/ggml-backend.h} +22 -17
  150. package/src/llama.cpp/ggml/include/ggml-blas.h +23 -0
  151. package/src/llama.cpp/ggml/include/ggml-cann.h +125 -0
  152. package/src/llama.cpp/{ggml-cuda.h → ggml/include/ggml-cuda.h} +3 -0
  153. package/src/llama.cpp/{ggml-metal.h → ggml/include/ggml-metal.h} +1 -2
  154. package/src/llama.cpp/{ggml-sycl.h → ggml/include/ggml-sycl.h} +3 -10
  155. package/src/llama.cpp/{ggml.h → ggml/include/ggml.h} +80 -85
  156. package/src/llama.cpp/ggml/src/CMakeLists.txt +1329 -0
  157. package/src/llama.cpp/ggml/src/ggml-aarch64.c +2193 -0
  158. package/src/llama.cpp/ggml/src/ggml-aarch64.h +39 -0
  159. package/src/llama.cpp/{ggml-alloc.c → ggml/src/ggml-alloc.c} +100 -49
  160. package/src/llama.cpp/{ggml-backend-impl.h → ggml/src/ggml-backend-impl.h} +20 -8
  161. package/src/llama.cpp/{ggml-backend.c → ggml/src/ggml-backend.c} +307 -167
  162. package/src/llama.cpp/ggml/src/ggml-blas.cpp +367 -0
  163. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +198 -0
  164. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +230 -0
  165. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +2944 -0
  166. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  167. package/src/llama.cpp/ggml/src/ggml-cann/common.h +282 -0
  168. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +32 -0
  169. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +17 -0
  170. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +223 -0
  171. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +186 -0
  172. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +180 -0
  173. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +193 -0
  174. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  175. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +208 -0
  176. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +206 -0
  177. package/src/llama.cpp/ggml/src/ggml-cann.cpp +2023 -0
  178. package/src/llama.cpp/{ggml-common.h → ggml/src/ggml-common.h} +41 -7
  179. package/src/llama.cpp/{ggml-impl.h → ggml/src/ggml-impl.h} +113 -9
  180. package/src/llama.cpp/{ggml-kompute.cpp → ggml/src/ggml-kompute.cpp} +33 -18
  181. package/src/llama.cpp/{ggml-quants.c → ggml/src/ggml-quants.c} +1460 -940
  182. package/src/llama.cpp/{ggml-quants.h → ggml/src/ggml-quants.h} +19 -20
  183. package/src/llama.cpp/{ggml-rpc.cpp → ggml/src/ggml-rpc.cpp} +95 -72
  184. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +27 -0
  185. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +53 -0
  186. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +355 -0
  187. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +195 -0
  188. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +21 -0
  189. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +547 -0
  190. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +27 -0
  191. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +698 -0
  192. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  193. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  194. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +3011 -0
  195. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  196. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +33 -0
  197. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1027 -0
  198. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +27 -0
  199. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +374 -0
  200. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +35 -0
  201. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +66 -0
  202. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +275 -0
  203. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +22 -0
  204. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +251 -0
  205. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +24 -0
  206. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +1140 -0
  207. package/src/llama.cpp/ggml/src/ggml-sycl.cpp +5314 -0
  208. package/src/llama.cpp/{ggml-vulkan.cpp → ggml/src/ggml-vulkan.cpp} +1781 -1868
  209. package/src/llama.cpp/{ggml.c → ggml/src/ggml.c} +1245 -2087
  210. package/src/llama.cpp/{sgemm.cpp → ggml/src/llamafile/sgemm.cpp} +21 -24
  211. package/src/llama.cpp/{sgemm.h → ggml/src/llamafile/sgemm.h} +1 -1
  212. package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +5 -0
  213. package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +552 -0
  214. package/src/llama.cpp/{llama.h → include/llama.h} +175 -100
  215. package/src/llama.cpp/models/.editorconfig +1 -0
  216. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  217. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  218. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  219. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +112 -0
  220. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +46 -0
  221. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  222. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +112 -0
  223. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +46 -0
  224. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  225. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +112 -0
  226. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +46 -0
  227. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  228. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +112 -0
  229. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +46 -0
  230. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  231. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +112 -0
  232. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +46 -0
  233. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  234. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +112 -0
  235. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +46 -0
  236. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  237. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  238. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +112 -0
  239. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +46 -0
  240. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  241. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +112 -0
  242. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +46 -0
  243. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +112 -0
  245. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +46 -0
  246. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +112 -0
  248. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +46 -0
  249. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  250. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +112 -0
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +46 -0
  252. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  253. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +112 -0
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +46 -0
  255. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  256. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +112 -0
  257. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +46 -0
  258. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  259. package/src/llama.cpp/requirements/requirements-all.txt +12 -0
  260. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +2 -0
  261. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -0
  262. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +3 -0
  263. package/src/llama.cpp/requirements/{requirements-convert.txt → requirements-convert_legacy_llama.txt} +1 -1
  264. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +1 -0
  265. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  266. package/src/llama.cpp/requirements/requirements-pydantic.txt +3 -0
  267. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +1 -0
  268. package/src/llama.cpp/requirements.txt +5 -4
  269. package/src/llama.cpp/scripts/build-info.sh +30 -0
  270. package/src/llama.cpp/scripts/install-oneapi.bat +19 -0
  271. package/src/llama.cpp/src/CMakeLists.txt +33 -0
  272. package/src/llama.cpp/src/llama-grammar.cpp +539 -0
  273. package/src/llama.cpp/src/llama-grammar.h +39 -0
  274. package/src/llama.cpp/src/llama-impl.h +26 -0
  275. package/src/llama.cpp/src/llama-sampling.cpp +635 -0
  276. package/src/llama.cpp/src/llama-sampling.h +56 -0
  277. package/src/llama.cpp/src/llama-vocab.cpp +1721 -0
  278. package/src/llama.cpp/src/llama-vocab.h +130 -0
  279. package/src/llama.cpp/{llama.cpp → src/llama.cpp} +5979 -5260
  280. package/src/llama.cpp/{unicode-data.cpp → src/unicode-data.cpp} +851 -802
  281. package/src/llama.cpp/{unicode.cpp → src/unicode.cpp} +52 -30
  282. package/src/llama.cpp/{unicode.h → src/unicode.h} +5 -1
  283. package/src/llama.cpp/tests/CMakeLists.txt +19 -20
  284. package/src/llama.cpp/tests/test-backend-ops.cpp +245 -67
  285. package/src/llama.cpp/tests/test-chat-template.cpp +57 -3
  286. package/src/llama.cpp/tests/test-double-float.cpp +2 -2
  287. package/src/llama.cpp/tests/test-grad0.cpp +2 -2
  288. package/src/llama.cpp/tests/test-grammar-integration.cpp +978 -31
  289. package/src/llama.cpp/tests/test-grammar-parser.cpp +423 -158
  290. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +508 -135
  291. package/src/llama.cpp/tests/test-llama-grammar.cpp +15 -9
  292. package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -1
  293. package/src/llama.cpp/tests/test-quantize-perf.cpp +1 -1
  294. package/src/llama.cpp/tests/test-rope.cpp +3 -4
  295. package/src/llama.cpp/tests/test-sampling.cpp +5 -5
  296. package/src/llama.cpp/tests/test-tokenizer-0.cpp +6 -6
  297. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +20 -15
  298. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +22 -11
  299. package/bin/darwin/arm64/default.metallib +0 -0
  300. package/bin/darwin/x64/default.metallib +0 -0
  301. package/src/llama.cpp/examples/beam-search/CMakeLists.txt +0 -5
  302. package/src/llama.cpp/examples/beam-search/beam-search.cpp +0 -188
  303. package/src/llama.cpp/examples/finetune/finetune.cpp +0 -1862
  304. package/src/llama.cpp/examples/llama.android/llama/CMakeLists.txt +0 -55
  305. package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +0 -5
  306. package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +0 -1253
  307. package/src/llama.cpp/ggml-opencl.cpp +0 -2305
  308. package/src/llama.cpp/ggml-opencl.h +0 -36
  309. package/src/llama.cpp/ggml-sycl.cpp +0 -17340
  310. package/src/llama.cpp/ggml-vulkan-shaders.hpp +0 -81211
  311. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf-update.txt +0 -2
  312. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +0 -2
  313. package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +0 -1
  314. package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +0 -24
  315. /package/src/llama.cpp/{ggml-alloc.h → ggml/include/ggml-alloc.h} +0 -0
  316. /package/src/llama.cpp/{ggml-kompute.h → ggml/include/ggml-kompute.h} +0 -0
  317. /package/src/llama.cpp/{ggml-rpc.h → ggml/include/ggml-rpc.h} +0 -0
  318. /package/src/llama.cpp/{ggml-vulkan.h → ggml/include/ggml-vulkan.h} +0 -0
  319. /package/src/llama.cpp/{unicode-data.h → src/unicode-data.h} +0 -0
@@ -0,0 +1,68 @@
1
+ plugins {
2
+ id("com.android.library")
3
+ id("org.jetbrains.kotlin.android")
4
+ }
5
+
6
+ android {
7
+ namespace = "android.llama.cpp"
8
+ compileSdk = 34
9
+
10
+ defaultConfig {
11
+ minSdk = 33
12
+
13
+ testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
14
+ consumerProguardFiles("consumer-rules.pro")
15
+ ndk {
16
+ // Add NDK properties if wanted, e.g.
17
+ // abiFilters += listOf("arm64-v8a")
18
+ }
19
+ externalNativeBuild {
20
+ cmake {
21
+ arguments += "-DCMAKE_BUILD_TYPE=Release"
22
+ cppFlags += listOf()
23
+ arguments += listOf()
24
+
25
+ cppFlags("")
26
+ }
27
+ }
28
+ }
29
+
30
+ buildTypes {
31
+ release {
32
+ isMinifyEnabled = false
33
+ proguardFiles(
34
+ getDefaultProguardFile("proguard-android-optimize.txt"),
35
+ "proguard-rules.pro"
36
+ )
37
+ }
38
+ }
39
+ externalNativeBuild {
40
+ cmake {
41
+ path("src/main/cpp/CMakeLists.txt")
42
+ version = "3.22.1"
43
+ }
44
+ }
45
+ compileOptions {
46
+ sourceCompatibility = JavaVersion.VERSION_1_8
47
+ targetCompatibility = JavaVersion.VERSION_1_8
48
+ }
49
+ kotlinOptions {
50
+ jvmTarget = "1.8"
51
+ }
52
+
53
+ packaging {
54
+ resources {
55
+ excludes += "/META-INF/{AL2.0,LGPL2.1}"
56
+ }
57
+ }
58
+ }
59
+
60
+ dependencies {
61
+
62
+ implementation("androidx.core:core-ktx:1.12.0")
63
+ implementation("androidx.appcompat:appcompat:1.6.1")
64
+ implementation("com.google.android.material:material:1.11.0")
65
+ testImplementation("junit:junit:4.13.2")
66
+ androidTestImplementation("androidx.test.ext:junit:1.1.5")
67
+ androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1")
68
+ }
@@ -11,15 +11,15 @@ cmake_minimum_required(VERSION 3.22.1)
11
11
  # build script scope).
12
12
  project("llama-android")
13
13
 
14
- include(FetchContent)
15
- FetchContent_Declare(
16
- llama
17
- GIT_REPOSITORY https://github.com/ggerganov/llama.cpp
18
- GIT_TAG master
19
- )
14
+ #include(FetchContent)
15
+ #FetchContent_Declare(
16
+ # llama
17
+ # GIT_REPOSITORY https://github.com/ggerganov/llama.cpp
18
+ # GIT_TAG master
19
+ #)
20
20
 
21
21
  # Also provides "common"
22
- FetchContent_MakeAvailable(llama)
22
+ #FetchContent_MakeAvailable(llama)
23
23
 
24
24
  # Creates and names a library, sets it as either STATIC
25
25
  # or SHARED, and provides the relative paths to its source code.
@@ -30,6 +30,10 @@ FetchContent_MakeAvailable(llama)
30
30
  # the target library name; in the sub-module's CMakeLists.txt, ${PROJECT_NAME}
31
31
  # is preferred for the same purpose.
32
32
  #
33
+
34
+ #load local llama.cpp
35
+ add_subdirectory(../../../../../../ build-llama)
36
+
33
37
  # In order to load a library into your app from Java/Kotlin, you must call
34
38
  # System.loadLibrary() and pass the name of the library defined here;
35
39
  # for GameActivity/NativeActivity derived applications, the same library name must be
@@ -5,7 +5,7 @@
5
5
  #include <string>
6
6
  #include <unistd.h>
7
7
  #include "llama.h"
8
- #include "common/common.h"
8
+ #include "common.h"
9
9
 
10
10
  // Write C++ code here.
11
11
  //
@@ -409,7 +409,7 @@ Java_android_llama_cpp_LLamaAndroid_completion_1loop(
409
409
 
410
410
  const auto n_cur = env->CallIntMethod(intvar_ncur, la_int_var_value);
411
411
  if (llama_token_is_eog(model, new_token_id) || n_cur == n_len) {
412
- return env->NewStringUTF("");
412
+ return nullptr;
413
413
  }
414
414
 
415
415
  auto new_token_chars = llama_token_to_piece(context, new_token_id);
@@ -0,0 +1,18 @@
1
+ pluginManagement {
2
+ repositories {
3
+ google()
4
+ mavenCentral()
5
+ gradlePluginPortal()
6
+ }
7
+ }
8
+ dependencyResolutionManagement {
9
+ repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
10
+ repositories {
11
+ google()
12
+ mavenCentral()
13
+ }
14
+ }
15
+
16
+ rootProject.name = "LlamaAndroid"
17
+ include(":app")
18
+ include(":llama")
@@ -30,8 +30,9 @@ if(TARGET BUILD_INFO)
30
30
  add_dependencies(llava BUILD_INFO)
31
31
  endif()
32
32
 
33
- set(TARGET llava-cli)
34
- add_executable(llava-cli llava-cli.cpp)
35
- install(TARGETS llava-cli RUNTIME)
36
- target_link_libraries(llava-cli PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
37
- target_compile_features(llava PRIVATE cxx_std_11)
33
+ set(TARGET llama-llava-cli)
34
+ add_executable(${TARGET} llava-cli.cpp)
35
+ set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-llava-cli)
36
+ install(TARGETS ${TARGET} RUNTIME)
37
+ target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
38
+ target_compile_features(${TARGET} PRIVATE cxx_std_11)
@@ -0,0 +1,8 @@
1
+ #!/bin/bash
2
+ cmake ../../../../ \
3
+ -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
4
+ -DCMAKE_BUILD_TYPE=Release \
5
+ -DANDROID_ABI="arm64-v8a" \
6
+ -DANDROID_PLATFORM=android-23 $1
7
+
8
+ make -j4
@@ -16,6 +16,10 @@
16
16
  #include "ggml-metal.h"
17
17
  #endif
18
18
 
19
+ #ifdef GGML_USE_CANN
20
+ #include "ggml-cann.h"
21
+ #endif
22
+
19
23
  #define STB_IMAGE_IMPLEMENTATION
20
24
  #include "stb_image.h"
21
25
 
@@ -865,7 +869,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
865
869
  embeddings = peg_0;
866
870
  }
867
871
  else {
868
- GGML_ASSERT(false);
872
+ GGML_ABORT("fatal error");
869
873
  }
870
874
  }
871
875
 
@@ -1001,6 +1005,11 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
1001
1005
  LOG_TEE("%s: CLIP using Metal backend\n", __func__);
1002
1006
  #endif
1003
1007
 
1008
+ #ifdef GGML_USE_CANN
1009
+ new_clip->backend = ggml_backend_cann_init(0);
1010
+ LOG_TEE("%s: CLIP using CANN backend\n", __func__);
1011
+ #endif
1012
+
1004
1013
 
1005
1014
  if (!new_clip->backend) {
1006
1015
  new_clip->backend = ggml_backend_cpu_init();
@@ -1121,20 +1130,20 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
1121
1130
  }
1122
1131
  if (n < 32)
1123
1132
  hparams.image_grid_pinpoints[n] = 0;
1124
- } catch (std::runtime_error & e) {
1133
+ } catch (std::runtime_error & /*e*/) {
1125
1134
  hparams.image_grid_pinpoints[0]=0;
1126
1135
  }
1127
1136
 
1128
1137
  try {
1129
1138
  int idx = get_key_idx(ctx, KEY_MM_PATCH_MERGE_TYPE);
1130
1139
  strcpy(hparams.mm_patch_merge_type, gguf_get_val_str(ctx, idx));
1131
- } catch (std::runtime_error & e) {
1140
+ } catch (std::runtime_error & /*e*/) {
1132
1141
  strcpy(hparams.mm_patch_merge_type, "flat");
1133
1142
  }
1134
1143
 
1135
1144
  try {
1136
1145
  hparams.image_crop_resolution = get_u32(ctx, KEY_IMAGE_CROP_RESOLUTION); // llava-1.6
1137
- } catch(const std::exception& e) {
1146
+ } catch(const std::exception& /*e*/) {
1138
1147
  hparams.image_crop_resolution = hparams.image_size;
1139
1148
  }
1140
1149
 
@@ -1173,7 +1182,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
1173
1182
  try {
1174
1183
  vision_model.class_embedding = get_tensor(new_clip->ctx_data, TN_CLASS_EMBD);
1175
1184
  new_clip->has_class_embedding = true;
1176
- } catch (const std::exception& e) {
1185
+ } catch (const std::exception& /*e*/) {
1177
1186
  new_clip->has_class_embedding = false;
1178
1187
  }
1179
1188
 
@@ -1181,7 +1190,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
1181
1190
  vision_model.pre_ln_w = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "weight"));
1182
1191
  vision_model.pre_ln_b = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "bias"));
1183
1192
  new_clip->has_pre_norm = true;
1184
- } catch (std::exception & e) {
1193
+ } catch (std::exception & /*e*/) {
1185
1194
  new_clip->has_pre_norm = false;
1186
1195
  }
1187
1196
 
@@ -1189,21 +1198,21 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
1189
1198
  vision_model.post_ln_w = get_tensor(new_clip->ctx_data, format(TN_LN_POST, "v", "weight"));
1190
1199
  vision_model.post_ln_b = get_tensor(new_clip->ctx_data, format(TN_LN_POST, "v", "bias"));
1191
1200
  new_clip->has_post_norm = true;
1192
- } catch (std::exception & e) {
1201
+ } catch (std::exception & /*e*/) {
1193
1202
  new_clip->has_post_norm = false;
1194
1203
  }
1195
1204
 
1196
1205
  try {
1197
1206
  vision_model.patch_bias = get_tensor(new_clip->ctx_data, TN_PATCH_BIAS);
1198
1207
  new_clip->has_patch_bias = true;
1199
- } catch (std::exception & e) {
1208
+ } catch (std::exception & /*e*/) {
1200
1209
  new_clip->has_patch_bias = false;
1201
1210
  }
1202
1211
 
1203
1212
  try {
1204
1213
  vision_model.patch_embeddings = get_tensor(new_clip->ctx_data, TN_PATCH_EMBD);
1205
1214
  vision_model.position_embeddings = get_tensor(new_clip->ctx_data, format(TN_POS_EMBD, "v"));
1206
- } catch(const std::exception& e) {
1215
+ } catch(const std::exception& /*e*/) {
1207
1216
  LOG_TEE("%s: failed to load vision model tensors\n", __func__);
1208
1217
  }
1209
1218
 
@@ -1215,26 +1224,26 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
1215
1224
  // Yi-type llava
1216
1225
  vision_model.mm_1_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 1, "weight"));
1217
1226
  vision_model.mm_1_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 1, "bias"));
1218
- } catch (std::runtime_error & e) { }
1227
+ } catch (std::runtime_error & /*e*/) { }
1219
1228
  try {
1220
1229
  // missing in Yi-type llava
1221
1230
  vision_model.mm_2_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "weight"));
1222
1231
  vision_model.mm_2_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "bias"));
1223
- } catch (std::runtime_error & e) { }
1232
+ } catch (std::runtime_error & /*e*/) { }
1224
1233
  try {
1225
1234
  // Yi-type llava
1226
1235
  vision_model.mm_3_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 3, "weight"));
1227
1236
  vision_model.mm_3_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 3, "bias"));
1228
- } catch (std::runtime_error & e) { }
1237
+ } catch (std::runtime_error & /*e*/) { }
1229
1238
  try {
1230
1239
  // Yi-type llava
1231
1240
  vision_model.mm_4_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 4, "weight"));
1232
1241
  vision_model.mm_4_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 4, "bias"));
1233
- } catch (std::runtime_error & e) { }
1242
+ } catch (std::runtime_error & /*e*/) { }
1234
1243
  try {
1235
1244
  vision_model.image_newline = get_tensor(new_clip->ctx_data, TN_IMAGE_NEWLINE);
1236
1245
  // LOG_TEE("%s: image_newline tensor (llava-1.6) found\n", __func__);
1237
- } catch (std::runtime_error & e) { }
1246
+ } catch (std::runtime_error & /*e*/) { }
1238
1247
  } else if (new_clip->proj_type == PROJECTOR_TYPE_LDP) {
1239
1248
  // MobileVLM projection
1240
1249
  vision_model.mm_model_mlp_1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 1, "weight"));
@@ -112,9 +112,12 @@ struct llava_context {
112
112
  struct llama_model * model = NULL;
113
113
  };
114
114
 
115
- static void show_additional_info(int /*argc*/, char ** argv) {
116
- LOG_TEE("\n example usage: %s -m <llava-v1.5-7b/ggml-model-q5_k.gguf> --mmproj <llava-v1.5-7b/mmproj-model-f16.gguf> --image <path/to/an/image.jpg> --image <path/to/another/image.jpg> [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]);
117
- LOG_TEE(" note: a lower temperature value like 0.1 is recommended for better quality.\n");
115
+ static void print_usage(int argc, char ** argv, const gpt_params & params) {
116
+ gpt_params_print_usage(argc, argv, params);
117
+
118
+ LOG_TEE("\n example usage:\n");
119
+ LOG_TEE("\n %s -m <llava-v1.5-7b/ggml-model-q5_k.gguf> --mmproj <llava-v1.5-7b/mmproj-model-f16.gguf> --image <path/to/an/image.jpg> --image <path/to/another/image.jpg> [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]);
120
+ LOG_TEE("\n note: a lower temperature value like 0.1 is recommended for better quality.\n");
118
121
  }
119
122
 
120
123
  static struct llava_image_embed * load_image(llava_context * ctx_llava, gpt_params * params, const std::string & fname) {
@@ -278,7 +281,7 @@ int main(int argc, char ** argv) {
278
281
  gpt_params params;
279
282
 
280
283
  if (!gpt_params_parse(argc, argv, params)) {
281
- show_additional_info(argc, argv);
284
+ print_usage(argc, argv, params);
282
285
  return 1;
283
286
  }
284
287
 
@@ -290,8 +293,7 @@ int main(int argc, char ** argv) {
290
293
  #endif // LOG_DISABLE_LOGS
291
294
 
292
295
  if (params.mmproj.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) {
293
- gpt_params_print_usage(argc, argv, params);
294
- show_additional_info(argc, argv);
296
+ print_usage(argc, argv, {});
295
297
  return 1;
296
298
  }
297
299
  auto model = llava_init(&params);
@@ -1,3 +1,4 @@
1
- -r ../../requirements/requirements-convert.txt
1
+ -r ../../requirements/requirements-convert_legacy_llama.txt
2
+ --extra-index-url https://download.pytorch.org/whl/cpu
2
3
  pillow~=10.2.0
3
- torch~=2.1.1
4
+ torch~=2.2.1
@@ -1,4 +1,4 @@
1
- set(TARGET lookahead)
1
+ set(TARGET llama-lookahead)
2
2
  add_executable(${TARGET} lookahead.cpp)
3
3
  install(TARGETS ${TARGET} RUNTIME)
4
4
  target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
@@ -37,7 +37,8 @@ struct ngram_container {
37
37
  int main(int argc, char ** argv) {
38
38
  gpt_params params;
39
39
 
40
- if (gpt_params_parse(argc, argv, params) == false) {
40
+ if (!gpt_params_parse(argc, argv, params)) {
41
+ gpt_params_print_usage(argc, argv, params);
41
42
  return 1;
42
43
  }
43
44
 
@@ -1,22 +1,22 @@
1
- set(TARGET lookup)
1
+ set(TARGET llama-lookup)
2
2
  add_executable(${TARGET} lookup.cpp)
3
3
  install(TARGETS ${TARGET} RUNTIME)
4
4
  target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
5
5
  target_compile_features(${TARGET} PRIVATE cxx_std_11)
6
6
 
7
- set(TARGET lookup-create)
7
+ set(TARGET llama-lookup-create)
8
8
  add_executable(${TARGET} lookup-create.cpp)
9
9
  install(TARGETS ${TARGET} RUNTIME)
10
10
  target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
11
11
  target_compile_features(${TARGET} PRIVATE cxx_std_11)
12
12
 
13
- set(TARGET lookup-merge)
13
+ set(TARGET llama-lookup-merge)
14
14
  add_executable(${TARGET} lookup-merge.cpp)
15
15
  install(TARGETS ${TARGET} RUNTIME)
16
16
  target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
17
17
  target_compile_features(${TARGET} PRIVATE cxx_std_11)
18
18
 
19
- set(TARGET lookup-stats)
19
+ set(TARGET llama-lookup-stats)
20
20
  add_executable(${TARGET} lookup-stats.cpp)
21
21
  install(TARGETS ${TARGET} RUNTIME)
22
22
  target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
@@ -14,8 +14,10 @@ int main(int argc, char ** argv){
14
14
  gpt_params params;
15
15
 
16
16
  if (!gpt_params_parse(argc, argv, params)) {
17
+ gpt_params_print_usage(argc, argv, params);
17
18
  return 1;
18
19
  }
20
+
19
21
  // init llama.cpp
20
22
  llama_backend_init();
21
23
  llama_numa_init(params.numa);
@@ -11,14 +11,14 @@
11
11
  #include <unordered_map>
12
12
  #include <vector>
13
13
 
14
- static void print_usage() {
14
+ static void print_usage(char* argv0) {
15
15
  fprintf(stderr, "Merges multiple lookup cache files into a single one.\n");
16
- fprintf(stderr, "Usage: lookup-merge [--help] lookup_part_1.bin lookup_part_2.bin ... lookup_merged.bin\n");
16
+ fprintf(stderr, "Usage: %s [--help] lookup_part_1.bin lookup_part_2.bin ... lookup_merged.bin\n", argv0);
17
17
  }
18
18
 
19
19
  int main(int argc, char ** argv){
20
20
  if (argc < 3) {
21
- print_usage();
21
+ print_usage(argv[0]);
22
22
  exit(1);
23
23
  }
24
24
 
@@ -27,7 +27,7 @@ int main(int argc, char ** argv){
27
27
  for (int i = 0; i < argc-1; ++i) {
28
28
  args[i] = argv[i+1];
29
29
  if (args[i] == "-h" || args[i] == "--help") {
30
- print_usage();
30
+ print_usage(argv[0]);
31
31
  exit(0);
32
32
  }
33
33
  }
@@ -16,6 +16,7 @@ int main(int argc, char ** argv){
16
16
  gpt_params params;
17
17
 
18
18
  if (!gpt_params_parse(argc, argv, params)) {
19
+ gpt_params_print_usage(argc, argv, params);
19
20
  return 1;
20
21
  }
21
22
 
@@ -30,7 +31,6 @@ int main(int argc, char ** argv){
30
31
 
31
32
  // load the model
32
33
  std::tie(model, ctx) = llama_init_from_gpt_params(params);
33
- GGML_ASSERT(llama_n_vocab(model) < (1 << 16));
34
34
 
35
35
  // tokenize the prompt
36
36
  std::vector<llama_token> inp;
@@ -64,7 +64,7 @@ int main(int argc, char ** argv){
64
64
  }
65
65
 
66
66
  const int n_input = inp.size();
67
- const int n_ctx = params.n_ctx;
67
+ const int n_ctx = llama_n_ctx(ctx);
68
68
 
69
69
  int n_drafted = 0;
70
70
  int n_accept = 0;
@@ -15,6 +15,7 @@ int main(int argc, char ** argv){
15
15
  gpt_params params;
16
16
 
17
17
  if (!gpt_params_parse(argc, argv, params)) {
18
+ gpt_params_print_usage(argc, argv, params);
18
19
  return 1;
19
20
  }
20
21
 
@@ -38,7 +39,6 @@ int main(int argc, char ** argv){
38
39
 
39
40
  // load the model
40
41
  std::tie(model, ctx) = llama_init_from_gpt_params(params);
41
- GGML_ASSERT(llama_n_vocab(model) < (1 << 16));
42
42
 
43
43
  // tokenize the prompt
44
44
  std::vector<llama_token> inp;
@@ -1,4 +1,4 @@
1
- set(TARGET main)
1
+ set(TARGET llama-cli)
2
2
  add_executable(${TARGET} main.cpp)
3
3
  install(TARGETS ${TARGET} RUNTIME)
4
4
  target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})