@fugood/llama.node 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. package/CMakeLists.txt +1 -8
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/package.json +4 -2
  17. package/src/DetokenizeWorker.cpp +1 -1
  18. package/src/EmbeddingWorker.cpp +2 -2
  19. package/src/LlamaCompletionWorker.cpp +10 -10
  20. package/src/LlamaCompletionWorker.h +2 -2
  21. package/src/LlamaContext.cpp +14 -17
  22. package/src/TokenizeWorker.cpp +1 -1
  23. package/src/common.hpp +5 -4
  24. package/src/llama.cpp/.github/workflows/build.yml +137 -29
  25. package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
  26. package/src/llama.cpp/.github/workflows/docker.yml +46 -34
  27. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
  28. package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
  29. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
  30. package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
  31. package/src/llama.cpp/.github/workflows/server.yml +7 -0
  32. package/src/llama.cpp/CMakeLists.txt +26 -11
  33. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  34. package/src/llama.cpp/common/CMakeLists.txt +10 -10
  35. package/src/llama.cpp/common/arg.cpp +2041 -0
  36. package/src/llama.cpp/common/arg.h +77 -0
  37. package/src/llama.cpp/common/common.cpp +523 -1861
  38. package/src/llama.cpp/common/common.h +234 -106
  39. package/src/llama.cpp/common/console.cpp +3 -0
  40. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  41. package/src/llama.cpp/common/log.cpp +401 -0
  42. package/src/llama.cpp/common/log.h +66 -698
  43. package/src/llama.cpp/common/ngram-cache.cpp +39 -36
  44. package/src/llama.cpp/common/ngram-cache.h +19 -19
  45. package/src/llama.cpp/common/sampling.cpp +356 -350
  46. package/src/llama.cpp/common/sampling.h +62 -139
  47. package/src/llama.cpp/common/stb_image.h +5990 -6398
  48. package/src/llama.cpp/docs/build.md +72 -17
  49. package/src/llama.cpp/examples/CMakeLists.txt +1 -2
  50. package/src/llama.cpp/examples/batched/batched.cpp +49 -65
  51. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +42 -53
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
  53. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +22 -22
  54. package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
  55. package/src/llama.cpp/examples/embedding/embedding.cpp +147 -91
  56. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +37 -37
  57. package/src/llama.cpp/examples/export-lora/export-lora.cpp +39 -38
  58. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
  59. package/src/llama.cpp/examples/{baby-llama → gen-docs}/CMakeLists.txt +2 -2
  60. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
  61. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
  62. package/src/llama.cpp/examples/gritlm/gritlm.cpp +46 -39
  63. package/src/llama.cpp/examples/imatrix/imatrix.cpp +75 -69
  64. package/src/llama.cpp/examples/infill/infill.cpp +131 -192
  65. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +276 -178
  66. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  67. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +40 -36
  68. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  69. package/src/llama.cpp/examples/llava/clip.cpp +686 -150
  70. package/src/llama.cpp/examples/llava/clip.h +11 -2
  71. package/src/llama.cpp/examples/llava/llava-cli.cpp +60 -71
  72. package/src/llama.cpp/examples/llava/llava.cpp +146 -26
  73. package/src/llama.cpp/examples/llava/llava.h +2 -3
  74. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
  75. package/src/llama.cpp/examples/llava/requirements.txt +1 -0
  76. package/src/llama.cpp/examples/lookahead/lookahead.cpp +55 -56
  77. package/src/llama.cpp/examples/lookup/lookup-create.cpp +15 -13
  78. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  79. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +34 -33
  80. package/src/llama.cpp/examples/lookup/lookup.cpp +60 -63
  81. package/src/llama.cpp/examples/main/main.cpp +216 -313
  82. package/src/llama.cpp/examples/parallel/parallel.cpp +58 -59
  83. package/src/llama.cpp/examples/passkey/passkey.cpp +53 -61
  84. package/src/llama.cpp/examples/perplexity/perplexity.cpp +277 -311
  85. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  86. package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
  87. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -12
  88. package/src/llama.cpp/examples/retrieval/retrieval.cpp +57 -52
  89. package/src/llama.cpp/examples/rpc/rpc-server.cpp +27 -2
  90. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +60 -46
  91. package/src/llama.cpp/examples/server/CMakeLists.txt +7 -18
  92. package/src/llama.cpp/examples/server/server.cpp +1347 -1531
  93. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
  94. package/src/llama.cpp/examples/server/utils.hpp +396 -107
  95. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  96. package/src/llama.cpp/examples/simple/simple.cpp +132 -106
  97. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
  98. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
  99. package/src/llama.cpp/examples/speculative/speculative.cpp +153 -124
  100. package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
  101. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
  102. package/src/llama.cpp/examples/tokenize/tokenize.cpp +27 -29
  103. package/src/llama.cpp/ggml/CMakeLists.txt +29 -12
  104. package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
  105. package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
  106. package/src/llama.cpp/ggml/include/ggml-backend.h +166 -68
  107. package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
  108. package/src/llama.cpp/ggml/include/ggml-cann.h +17 -19
  109. package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
  110. package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
  111. package/src/llama.cpp/ggml/include/ggml-cuda.h +17 -17
  112. package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
  113. package/src/llama.cpp/ggml/include/ggml-metal.h +13 -12
  114. package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
  115. package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
  116. package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
  117. package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
  118. package/src/llama.cpp/ggml/include/ggml.h +272 -505
  119. package/src/llama.cpp/ggml/src/CMakeLists.txt +69 -1110
  120. package/src/llama.cpp/ggml/src/ggml-aarch64.c +52 -2116
  121. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
  122. package/src/llama.cpp/ggml/src/ggml-alloc.c +29 -27
  123. package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
  124. package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
  125. package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  126. package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
  127. package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
  128. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +144 -81
  129. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
  130. package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +394 -635
  131. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
  132. package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +217 -70
  133. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
  134. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
  135. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
  136. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
  137. package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
  138. package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +458 -353
  139. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
  140. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
  141. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
  142. package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
  143. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
  144. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
  145. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
  146. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +371 -0
  147. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
  148. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  149. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
  150. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
  151. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1885 -0
  152. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
  153. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  154. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
  155. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  156. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
  157. package/src/llama.cpp/ggml/src/ggml-impl.h +380 -584
  158. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
  159. package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +233 -87
  160. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
  161. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
  162. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
  163. package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
  164. package/src/llama.cpp/ggml/src/ggml-quants.c +369 -9994
  165. package/src/llama.cpp/ggml/src/ggml-quants.h +78 -110
  166. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
  167. package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +560 -335
  168. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
  169. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +6 -0
  170. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +51 -0
  171. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +310 -0
  172. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
  173. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
  174. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
  175. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
  176. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
  177. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
  178. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
  179. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +18 -25
  180. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
  181. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
  182. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
  183. package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3350 -3980
  184. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
  185. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
  186. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +70 -68
  187. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +9 -6
  188. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
  189. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
  190. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +8 -0
  191. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
  192. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
  193. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
  194. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
  195. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
  196. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
  197. package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  198. package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
  199. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
  200. package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2034 -1718
  201. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +2 -0
  202. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +152 -185
  203. package/src/llama.cpp/ggml/src/ggml.c +2075 -16579
  204. package/src/llama.cpp/include/llama.h +296 -285
  205. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
  206. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
  207. package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
  208. package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
  209. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
  210. package/src/llama.cpp/src/CMakeLists.txt +2 -1
  211. package/src/llama.cpp/src/llama-grammar.cpp +721 -122
  212. package/src/llama.cpp/src/llama-grammar.h +120 -15
  213. package/src/llama.cpp/src/llama-impl.h +156 -1
  214. package/src/llama.cpp/src/llama-sampling.cpp +2058 -346
  215. package/src/llama.cpp/src/llama-sampling.h +39 -47
  216. package/src/llama.cpp/src/llama-vocab.cpp +390 -127
  217. package/src/llama.cpp/src/llama-vocab.h +60 -20
  218. package/src/llama.cpp/src/llama.cpp +6215 -3263
  219. package/src/llama.cpp/src/unicode-data.cpp +6 -4
  220. package/src/llama.cpp/src/unicode-data.h +4 -4
  221. package/src/llama.cpp/src/unicode.cpp +15 -7
  222. package/src/llama.cpp/tests/CMakeLists.txt +4 -2
  223. package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
  224. package/src/llama.cpp/tests/test-backend-ops.cpp +1725 -297
  225. package/src/llama.cpp/tests/test-barrier.cpp +94 -0
  226. package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
  227. package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
  228. package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
  229. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +23 -8
  230. package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
  231. package/src/llama.cpp/tests/test-log.cpp +39 -0
  232. package/src/llama.cpp/tests/test-opt.cpp +853 -142
  233. package/src/llama.cpp/tests/test-quantize-fns.cpp +28 -19
  234. package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
  235. package/src/llama.cpp/tests/test-rope.cpp +2 -1
  236. package/src/llama.cpp/tests/test-sampling.cpp +226 -142
  237. package/src/llama.cpp/tests/test-tokenizer-0.cpp +56 -36
  238. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
  239. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
  240. package/patches/llama.patch +0 -22
  241. package/src/llama.cpp/.github/workflows/bench.yml +0 -310
  242. package/src/llama.cpp/common/grammar-parser.cpp +0 -536
  243. package/src/llama.cpp/common/grammar-parser.h +0 -29
  244. package/src/llama.cpp/common/train.cpp +0 -1513
  245. package/src/llama.cpp/common/train.h +0 -233
  246. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1640
  247. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
  248. package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
  249. package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +0 -1027
  250. package/src/llama.cpp/tests/test-grad0.cpp +0 -1566
  251. /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
  252. /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
@@ -18,14 +18,17 @@
18
18
  # define CLIP_API
19
19
  #endif
20
20
 
21
- struct clip_ctx;
22
-
23
21
  #ifdef __cplusplus
24
22
  extern "C" {
25
23
  #endif
26
24
 
27
25
  struct clip_ctx;
28
26
 
27
+ struct clip_image_size {
28
+ int width;
29
+ int height;
30
+ };
31
+
29
32
  struct clip_image_u8_batch {
30
33
  struct clip_image_u8 * data;
31
34
  size_t size;
@@ -55,6 +58,10 @@ CLIP_API const int32_t * clip_image_grid(const struct clip_ctx * ctx);
55
58
  CLIP_API int clip_n_patches (const struct clip_ctx * ctx);
56
59
  CLIP_API int clip_n_mmproj_embd(const struct clip_ctx * ctx);
57
60
 
61
+ CLIP_API int clip_uhd_num_image_embeds_col(struct clip_ctx * ctx_clip);
62
+ CLIP_API void clip_add_load_image_size(struct clip_ctx * ctx_clip, struct clip_image_size * load_image_size);
63
+
64
+ CLIP_API struct clip_image_size * clip_image_size_init();
58
65
  CLIP_API struct clip_image_u8 * clip_image_u8_init ();
59
66
  CLIP_API struct clip_image_f32 * clip_image_f32_init();
60
67
 
@@ -78,6 +85,8 @@ CLIP_API bool clip_image_batch_encode(struct clip_ctx * ctx, int n_threads, cons
78
85
 
79
86
  CLIP_API bool clip_model_quantize(const char * fname_inp, const char * fname_out, int itype);
80
87
 
88
+ CLIP_API int clip_is_minicpmv(const struct clip_ctx * ctx);
89
+
81
90
  #ifdef __cplusplus
82
91
  }
83
92
  #endif
@@ -1,14 +1,16 @@
1
- #include "ggml.h"
1
+ #include "arg.h"
2
+ #include "base64.hpp"
2
3
  #include "log.h"
3
4
  #include "common.h"
5
+ #include "sampling.h"
4
6
  #include "clip.h"
5
7
  #include "llava.h"
6
8
  #include "llama.h"
7
-
8
- #include "base64.hpp"
9
+ #include "ggml.h"
9
10
 
10
11
  #include <cstdio>
11
12
  #include <cstdlib>
13
+ #include <cstring>
12
14
  #include <vector>
13
15
 
14
16
  static bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_token> tokens, int n_batch, int * n_past) {
@@ -18,8 +20,8 @@ static bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_toke
18
20
  if (n_eval > n_batch) {
19
21
  n_eval = n_batch;
20
22
  }
21
- if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval, *n_past, 0))) {
22
- LOG_TEE("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, N, n_batch, *n_past);
23
+ if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval))) {
24
+ LOG_ERR("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, N, n_batch, *n_past);
23
25
  return false;
24
26
  }
25
27
  *n_past += n_eval;
@@ -35,21 +37,21 @@ static bool eval_id(struct llama_context * ctx_llama, int id, int * n_past) {
35
37
 
36
38
  static bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){
37
39
  std::string str2 = str;
38
- std::vector<llama_token> embd_inp = ::llama_tokenize(ctx_llama, str2, add_bos, true);
40
+ std::vector<llama_token> embd_inp = common_tokenize(ctx_llama, str2, add_bos, true);
39
41
  eval_tokens(ctx_llama, embd_inp, n_batch, n_past);
40
42
  return true;
41
43
  }
42
44
 
43
- static const char * sample(struct llama_sampling_context * ctx_sampling,
45
+ static const char * sample(struct common_sampler * smpl,
44
46
  struct llama_context * ctx_llama,
45
47
  int * n_past) {
46
- const llama_token id = llama_sampling_sample(ctx_sampling, ctx_llama, NULL);
47
- llama_sampling_accept(ctx_sampling, ctx_llama, id, true);
48
+ const llama_token id = common_sampler_sample(smpl, ctx_llama, -1);
49
+ common_sampler_accept(smpl, id, true);
48
50
  static std::string ret;
49
51
  if (llama_token_is_eog(llama_get_model(ctx_llama), id)) {
50
52
  ret = "</s>";
51
53
  } else {
52
- ret = llama_token_to_piece(ctx_llama, id);
54
+ ret = common_token_to_piece(ctx_llama, id);
53
55
  }
54
56
  eval_id(ctx_llama, id, n_past);
55
57
  return ret.c_str();
@@ -74,7 +76,7 @@ static llava_image_embed * llava_image_embed_make_with_prompt_base64(struct clip
74
76
  size_t img_base64_str_start, img_base64_str_end;
75
77
  find_image_tag_in_prompt(prompt, img_base64_str_start, img_base64_str_end);
76
78
  if (img_base64_str_start == std::string::npos || img_base64_str_end == std::string::npos) {
77
- LOG_TEE("%s: invalid base64 image tag. must be %s<base64 byte string>%s\n", __func__, IMG_BASE64_TAG_BEGIN, IMG_BASE64_TAG_END);
79
+ LOG_ERR("%s: invalid base64 image tag. must be %s<base64 byte string>%s\n", __func__, IMG_BASE64_TAG_BEGIN, IMG_BASE64_TAG_END);
78
80
  return NULL;
79
81
  }
80
82
 
@@ -88,7 +90,7 @@ static llava_image_embed * llava_image_embed_make_with_prompt_base64(struct clip
88
90
 
89
91
  auto embed = llava_image_embed_make_with_bytes(ctx_clip, n_threads, img_bytes.data(), img_bytes.size());
90
92
  if (!embed) {
91
- LOG_TEE("%s: could not load image from base64 string.\n", __func__);
93
+ LOG_ERR("%s: could not load image from base64 string.\n", __func__);
92
94
  return NULL;
93
95
  }
94
96
 
@@ -112,31 +114,29 @@ struct llava_context {
112
114
  struct llama_model * model = NULL;
113
115
  };
114
116
 
115
- static void print_usage(int argc, char ** argv, const gpt_params & params) {
116
- gpt_params_print_usage(argc, argv, params);
117
-
118
- LOG_TEE("\n example usage:\n");
119
- LOG_TEE("\n %s -m <llava-v1.5-7b/ggml-model-q5_k.gguf> --mmproj <llava-v1.5-7b/mmproj-model-f16.gguf> --image <path/to/an/image.jpg> --image <path/to/another/image.jpg> [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]);
120
- LOG_TEE("\n note: a lower temperature value like 0.1 is recommended for better quality.\n");
117
+ static void print_usage(int, char ** argv) {
118
+ LOG("\n example usage:\n");
119
+ LOG("\n %s -m <llava-v1.5-7b/ggml-model-q5_k.gguf> --mmproj <llava-v1.5-7b/mmproj-model-f16.gguf> --image <path/to/an/image.jpg> --image <path/to/another/image.jpg> [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]);
120
+ LOG("\n note: a lower temperature value like 0.1 is recommended for better quality.\n");
121
121
  }
122
122
 
123
- static struct llava_image_embed * load_image(llava_context * ctx_llava, gpt_params * params, const std::string & fname) {
123
+ static struct llava_image_embed * load_image(llava_context * ctx_llava, common_params * params, const std::string & fname) {
124
124
 
125
125
  // load and preprocess the image
126
126
  llava_image_embed * embed = NULL;
127
127
  auto prompt = params->prompt;
128
128
  if (prompt_contains_image(prompt)) {
129
129
  if (!params->image.empty()) {
130
- LOG_TEE("using base64 encoded image instead of command line image path\n");
130
+ LOG_INF("using base64 encoded image instead of command line image path\n");
131
131
  }
132
- embed = llava_image_embed_make_with_prompt_base64(ctx_llava->ctx_clip, params->n_threads, prompt);
132
+ embed = llava_image_embed_make_with_prompt_base64(ctx_llava->ctx_clip, params->cpuparams.n_threads, prompt);
133
133
  if (!embed) {
134
- LOG_TEE("%s: can't load image from prompt\n", __func__);
134
+ LOG_ERR("%s: can't load image from prompt\n", __func__);
135
135
  return NULL;
136
136
  }
137
137
  params->prompt = remove_image_from_prompt(prompt);
138
138
  } else {
139
- embed = llava_image_embed_make_with_filename(ctx_llava->ctx_clip, params->n_threads, fname.c_str());
139
+ embed = llava_image_embed_make_with_filename(ctx_llava->ctx_clip, params->cpuparams.n_threads, fname.c_str());
140
140
  if (!embed) {
141
141
  fprintf(stderr, "%s: is %s really an image file?\n", __func__, fname.c_str());
142
142
  return NULL;
@@ -146,7 +146,7 @@ static struct llava_image_embed * load_image(llava_context * ctx_llava, gpt_para
146
146
  return embed;
147
147
  }
148
148
 
149
- static void process_prompt(struct llava_context * ctx_llava, struct llava_image_embed * image_embed, gpt_params * params, const std::string & prompt) {
149
+ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_embed * image_embed, common_params * params, const std::string & prompt) {
150
150
  int n_past = 0;
151
151
 
152
152
  const int max_tgt_len = params->n_predict < 0 ? 256 : params->n_predict;
@@ -157,18 +157,18 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
157
157
  // new templating mode: Provide the full prompt including system message and use <image> as a placeholder for the image
158
158
  system_prompt = prompt.substr(0, image_pos);
159
159
  user_prompt = prompt.substr(image_pos + std::string("<image>").length());
160
- LOG_TEE("system_prompt: %s\n", system_prompt.c_str());
160
+ LOG_INF("system_prompt: %s\n", system_prompt.c_str());
161
161
  if (params->verbose_prompt) {
162
- auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, system_prompt, true, true);
162
+ auto tmp = common_tokenize(ctx_llava->ctx_llama, system_prompt, true, true);
163
163
  for (int i = 0; i < (int) tmp.size(); i++) {
164
- LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
164
+ LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
165
165
  }
166
166
  }
167
- LOG_TEE("user_prompt: %s\n", user_prompt.c_str());
167
+ LOG_INF("user_prompt: %s\n", user_prompt.c_str());
168
168
  if (params->verbose_prompt) {
169
- auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
169
+ auto tmp = common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
170
170
  for (int i = 0; i < (int) tmp.size(); i++) {
171
- LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
171
+ LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
172
172
  }
173
173
  }
174
174
  } else {
@@ -176,9 +176,9 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
176
176
  system_prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:";
177
177
  user_prompt = prompt + "\nASSISTANT:";
178
178
  if (params->verbose_prompt) {
179
- auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
179
+ auto tmp = common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
180
180
  for (int i = 0; i < (int) tmp.size(); i++) {
181
- LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
181
+ LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
182
182
  }
183
183
  }
184
184
  }
@@ -189,21 +189,21 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
189
189
 
190
190
  // generate the response
191
191
 
192
- LOG_TEE("\n");
192
+ LOG("\n");
193
193
 
194
- struct llama_sampling_context * ctx_sampling = llama_sampling_init(params->sparams);
195
- if (!ctx_sampling) {
196
- fprintf(stderr, "%s: failed to initialize sampling subsystem\n", __func__);
194
+ struct common_sampler * smpl = common_sampler_init(ctx_llava->model, params->sparams);
195
+ if (!smpl) {
196
+ LOG_ERR("%s: failed to initialize sampling subsystem\n", __func__);
197
197
  exit(1);
198
198
  }
199
199
 
200
200
  std::string response = "";
201
201
  for (int i = 0; i < max_tgt_len; i++) {
202
- const char * tmp = sample(ctx_sampling, ctx_llava->ctx_llama, &n_past);
202
+ const char * tmp = sample(smpl, ctx_llava->ctx_llama, &n_past);
203
203
  response += tmp;
204
204
  if (strcmp(tmp, "</s>") == 0) break;
205
205
  if (strstr(tmp, "###")) break; // Yi-VL behavior
206
- printf("%s", tmp);
206
+ LOG("%s", tmp);
207
207
  if (strstr(response.c_str(), "<|im_end|>")) break; // Yi-34B llava-1.6 - for some reason those decode not as the correct token (tokenizer works)
208
208
  if (strstr(response.c_str(), "<|im_start|>")) break; // Yi-34B llava-1.6
209
209
  if (strstr(response.c_str(), "USER:")) break; // mistral llava-1.6
@@ -211,25 +211,25 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
211
211
  fflush(stdout);
212
212
  }
213
213
 
214
- llama_sampling_free(ctx_sampling);
215
- printf("\n");
214
+ common_sampler_free(smpl);
215
+ LOG("\n");
216
216
  }
217
217
 
218
- static struct llama_model * llava_init(gpt_params * params) {
218
+ static struct llama_model * llava_init(common_params * params) {
219
219
  llama_backend_init();
220
220
  llama_numa_init(params->numa);
221
221
 
222
- llama_model_params model_params = llama_model_params_from_gpt_params(*params);
222
+ llama_model_params model_params = common_model_params_to_llama(*params);
223
223
 
224
224
  llama_model * model = llama_load_model_from_file(params->model.c_str(), model_params);
225
225
  if (model == NULL) {
226
- LOG_TEE("%s: error: unable to load model\n" , __func__);
226
+ LOG_ERR("%s: unable to load model\n" , __func__);
227
227
  return NULL;
228
228
  }
229
229
  return model;
230
230
  }
231
231
 
232
- static struct llava_context * llava_init_context(gpt_params * params, llama_model * model) {
232
+ static struct llava_context * llava_init_context(common_params * params, llama_model * model) {
233
233
  const char * clip_path = params->mmproj.c_str();
234
234
 
235
235
  auto prompt = params->prompt;
@@ -240,17 +240,17 @@ static struct llava_context * llava_init_context(gpt_params * params, llama_mode
240
240
  auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/ 1);
241
241
 
242
242
 
243
- llama_context_params ctx_params = llama_context_params_from_gpt_params(*params);
243
+ llama_context_params ctx_params = common_context_params_to_llama(*params);
244
244
  ctx_params.n_ctx = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings
245
245
 
246
246
  llama_context * ctx_llama = llama_new_context_with_model(model, ctx_params);
247
247
 
248
248
  if (ctx_llama == NULL) {
249
- LOG_TEE("%s: error: failed to create the llama_context\n" , __func__);
249
+ LOG_ERR("%s: failed to create the llama_context\n" , __func__);
250
250
  return NULL;
251
251
  }
252
252
 
253
- auto ctx_llava = (struct llava_context *)malloc(sizeof(llava_context));
253
+ auto * ctx_llava = (struct llava_context *)malloc(sizeof(llava_context));
254
254
 
255
255
  ctx_llava->ctx_llama = ctx_llama;
256
256
  ctx_llava->ctx_clip = ctx_clip;
@@ -269,65 +269,54 @@ static void llava_free(struct llava_context * ctx_llava) {
269
269
  llama_backend_free();
270
270
  }
271
271
 
272
- static void llama_log_callback_logTee(ggml_log_level level, const char * text, void * user_data) {
273
- (void) level;
274
- (void) user_data;
275
- LOG_TEE("%s", text);
276
- }
277
-
278
272
  int main(int argc, char ** argv) {
279
273
  ggml_time_init();
280
274
 
281
- gpt_params params;
275
+ common_params params;
282
276
 
283
- if (!gpt_params_parse(argc, argv, params)) {
284
- print_usage(argc, argv, params);
277
+ if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_LLAVA, print_usage)) {
285
278
  return 1;
286
279
  }
287
280
 
288
- #ifndef LOG_DISABLE_LOGS
289
- log_set_target(log_filename_generator("llava", "log"));
290
- LOG_TEE("Log start\n");
291
- log_dump_cmdline(argc, argv);
292
- llama_log_set(llama_log_callback_logTee, nullptr);
293
- #endif // LOG_DISABLE_LOGS
281
+ common_init();
294
282
 
295
283
  if (params.mmproj.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) {
296
- print_usage(argc, argv, {});
284
+ print_usage(argc, argv);
297
285
  return 1;
298
286
  }
299
- auto model = llava_init(&params);
287
+
288
+ auto * model = llava_init(&params);
300
289
  if (model == NULL) {
301
290
  fprintf(stderr, "%s: error: failed to init llava model\n", __func__);
302
291
  return 1;
303
292
  }
304
293
 
305
294
  if (prompt_contains_image(params.prompt)) {
306
- auto ctx_llava = llava_init_context(&params, model);
295
+ auto * ctx_llava = llava_init_context(&params, model);
307
296
 
308
- auto image_embed = load_image(ctx_llava, &params, "");
297
+ auto * image_embed = load_image(ctx_llava, &params, "");
309
298
 
310
299
  // process the prompt
311
300
  process_prompt(ctx_llava, image_embed, &params, params.prompt);
312
301
 
313
- llama_print_timings(ctx_llava->ctx_llama);
302
+ llama_perf_context_print(ctx_llava->ctx_llama);
314
303
  llava_image_embed_free(image_embed);
315
304
  ctx_llava->model = NULL;
316
305
  llava_free(ctx_llava);
317
306
  } else {
318
307
  for (auto & image : params.image) {
319
- auto ctx_llava = llava_init_context(&params, model);
308
+ auto * ctx_llava = llava_init_context(&params, model);
320
309
 
321
- auto image_embed = load_image(ctx_llava, &params, image);
310
+ auto * image_embed = load_image(ctx_llava, &params, image);
322
311
  if (!image_embed) {
323
- std::cerr << "error: failed to load image " << image << ". Terminating\n\n";
312
+ LOG_ERR("%s: failed to load image %s. Terminating\n\n", __func__, image.c_str());
324
313
  return 1;
325
314
  }
326
315
 
327
316
  // process the prompt
328
317
  process_prompt(ctx_llava, image_embed, &params, params.prompt);
329
318
 
330
- llama_print_timings(ctx_llava->ctx_llama);
319
+ llama_perf_context_print(ctx_llava->ctx_llama);
331
320
  llava_image_embed_free(image_embed);
332
321
  ctx_llava->model = NULL;
333
322
  llava_free(ctx_llava);