@fugood/llama.node 0.3.16 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (281) hide show
  1. package/CMakeLists.txt +6 -1
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +44 -2
  19. package/lib/index.js +132 -1
  20. package/lib/index.ts +203 -3
  21. package/package.json +2 -1
  22. package/src/EmbeddingWorker.cpp +1 -1
  23. package/src/LlamaCompletionWorker.cpp +374 -19
  24. package/src/LlamaCompletionWorker.h +31 -10
  25. package/src/LlamaContext.cpp +216 -7
  26. package/src/LlamaContext.h +12 -0
  27. package/src/common.hpp +15 -0
  28. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
  29. package/src/llama.cpp/.github/workflows/build.yml +89 -767
  30. package/src/llama.cpp/.github/workflows/docker.yml +9 -6
  31. package/src/llama.cpp/.github/workflows/release.yml +716 -0
  32. package/src/llama.cpp/.github/workflows/server.yml +19 -23
  33. package/src/llama.cpp/CMakeLists.txt +11 -1
  34. package/src/llama.cpp/cmake/build-info.cmake +8 -2
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
  36. package/src/llama.cpp/common/CMakeLists.txt +35 -4
  37. package/src/llama.cpp/common/arg.cpp +844 -121
  38. package/src/llama.cpp/common/arg.h +9 -0
  39. package/src/llama.cpp/common/chat.cpp +129 -107
  40. package/src/llama.cpp/common/chat.h +2 -0
  41. package/src/llama.cpp/common/common.cpp +64 -518
  42. package/src/llama.cpp/common/common.h +35 -45
  43. package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
  44. package/src/llama.cpp/common/llguidance.cpp +31 -47
  45. package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
  46. package/src/llama.cpp/common/minja/minja.hpp +186 -127
  47. package/src/llama.cpp/common/regex-partial.cpp +204 -0
  48. package/src/llama.cpp/common/regex-partial.h +56 -0
  49. package/src/llama.cpp/common/sampling.cpp +60 -50
  50. package/src/llama.cpp/docs/build.md +122 -7
  51. package/src/llama.cpp/examples/CMakeLists.txt +2 -32
  52. package/src/llama.cpp/examples/batched/batched.cpp +1 -1
  53. package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
  54. package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
  55. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  56. package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
  57. package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
  58. package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
  59. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
  60. package/src/llama.cpp/examples/sycl/build.sh +2 -2
  61. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
  62. package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
  63. package/src/llama.cpp/examples/training/finetune.cpp +96 -0
  64. package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
  65. package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
  66. package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
  67. package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
  68. package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
  69. package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
  70. package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
  71. package/src/llama.cpp/ggml/include/ggml.h +76 -106
  72. package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
  73. package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
  74. package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
  75. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
  76. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
  77. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
  78. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
  79. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
  80. package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
  81. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
  82. package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
  83. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
  84. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  85. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  86. package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
  87. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
  88. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
  89. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
  90. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
  91. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
  92. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
  93. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
  94. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
  95. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
  96. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
  97. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
  98. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
  99. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
  100. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  101. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  102. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
  103. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
  104. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
  105. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
  106. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
  107. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
  108. package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
  109. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
  110. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
  111. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
  112. package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
  113. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
  114. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
  115. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
  116. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
  117. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
  118. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  119. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
  120. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
  121. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
  122. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
  123. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  124. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
  125. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
  126. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
  127. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
  128. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  129. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
  130. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
  131. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
  132. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
  133. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
  134. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
  135. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
  136. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
  137. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
  138. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
  140. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
  141. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
  142. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
  143. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
  144. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
  145. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
  146. package/src/llama.cpp/ggml/src/ggml.c +170 -265
  147. package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
  148. package/src/llama.cpp/include/llama.h +82 -22
  149. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
  150. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
  151. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
  152. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
  153. package/src/llama.cpp/requirements/requirements-all.txt +5 -3
  154. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
  155. package/src/llama.cpp/scripts/xxd.cmake +1 -1
  156. package/src/llama.cpp/src/CMakeLists.txt +4 -2
  157. package/src/llama.cpp/src/llama-adapter.cpp +43 -1
  158. package/src/llama.cpp/src/llama-arch.cpp +163 -17
  159. package/src/llama.cpp/src/llama-arch.h +16 -0
  160. package/src/llama.cpp/src/llama-batch.cpp +5 -1
  161. package/src/llama.cpp/src/llama-batch.h +2 -1
  162. package/src/llama.cpp/src/llama-chat.cpp +91 -16
  163. package/src/llama.cpp/src/llama-chat.h +7 -2
  164. package/src/llama.cpp/src/llama-context.cpp +479 -575
  165. package/src/llama.cpp/src/llama-context.h +44 -33
  166. package/src/llama.cpp/src/llama-cparams.h +1 -0
  167. package/src/llama.cpp/src/llama-graph.cpp +209 -157
  168. package/src/llama.cpp/src/llama-graph.h +38 -14
  169. package/src/llama.cpp/src/llama-hparams.h +13 -0
  170. package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
  171. package/src/llama.cpp/src/llama-kv-cache.h +283 -171
  172. package/src/llama.cpp/src/llama-memory.h +12 -2
  173. package/src/llama.cpp/src/llama-mmap.cpp +1 -1
  174. package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
  175. package/src/llama.cpp/src/llama-model-loader.h +5 -3
  176. package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
  177. package/src/llama.cpp/src/llama-model-saver.h +37 -0
  178. package/src/llama.cpp/src/llama-model.cpp +1803 -330
  179. package/src/llama.cpp/src/llama-model.h +21 -2
  180. package/src/llama.cpp/src/llama-quant.cpp +33 -10
  181. package/src/llama.cpp/src/llama-sampling.cpp +25 -7
  182. package/src/llama.cpp/src/llama-vocab.cpp +86 -10
  183. package/src/llama.cpp/src/llama-vocab.h +6 -0
  184. package/src/llama.cpp/src/llama.cpp +15 -1
  185. package/src/llama.cpp/tests/CMakeLists.txt +52 -31
  186. package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
  187. package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
  188. package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
  189. package/src/llama.cpp/tests/test-chat.cpp +15 -3
  190. package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
  191. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
  192. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
  193. package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
  194. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
  195. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
  196. package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
  197. package/src/llama.cpp/tests/test-opt.cpp +33 -21
  198. package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
  199. package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
  200. package/src/llama.cpp/tests/test-sampling.cpp +1 -1
  201. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
  202. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
  203. package/src/llama.cpp/tools/CMakeLists.txt +39 -0
  204. package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
  205. package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
  206. package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
  207. package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
  208. package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
  209. package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
  210. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
  211. package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
  212. package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
  213. package/src/llama.cpp/tools/mtmd/clip.h +99 -0
  214. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
  215. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
  216. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
  217. package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
  218. package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
  219. package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
  220. package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
  221. package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
  222. package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
  223. package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
  224. package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
  225. package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
  226. package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
  227. package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
  228. package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
  229. package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
  230. package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
  231. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
  232. package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
  233. package/src/llama.cpp/examples/infill/infill.cpp +0 -590
  234. package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
  235. package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
  236. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
  237. package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
  238. package/src/llama.cpp/examples/llava/clip.h +0 -118
  239. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
  240. package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
  241. package/src/llama.cpp/examples/llava/llava.cpp +0 -574
  242. package/src/llama.cpp/examples/llava/llava.h +0 -49
  243. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
  244. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
  245. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
  246. package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
  247. package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
  248. package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
  249. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
  250. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
  251. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
  252. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
  253. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
  254. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
  255. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
  256. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
  257. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
  258. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
  259. /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
  260. /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
  261. /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
  262. /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
  263. /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
  264. /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
  265. /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
  266. /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
  267. /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
  268. /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
  269. /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
  270. /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
  271. /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
  272. /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
  273. /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
  274. /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
  275. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
  276. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
  277. /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
  278. /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
  279. /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
  280. /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
  281. /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
@@ -1,354 +0,0 @@
1
- #include "arg.h"
2
- #include "log.h"
3
- #include "common.h"
4
- #include "sampling.h"
5
- #include "clip.h"
6
- #include "llava.h"
7
- #include "llama.h"
8
- #include "ggml.h"
9
-
10
- #include <algorithm>
11
- #include <cstdio>
12
- #include <cstdlib>
13
- #include <cstring>
14
- #include <vector>
15
- #include <iostream> // TODO: remove me
16
-
17
- struct llava_context {
18
- struct clip_ctx * ctx_clip = NULL;
19
- struct llama_context * ctx_llama = NULL;
20
- struct llama_model * model = NULL;
21
- };
22
-
23
- static void show_additional_info(int /*argc*/, char ** argv) {
24
- LOG("\nexample usage:\n\n%s -m <llava-v1.5-7b/ggml-model-q5_k.gguf> --mmproj <llava-v1.5-7b/mmproj-model-f16.gguf> --image <path/to/an/image.jpg> --image <path/to/another/image.jpg> [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]);
25
- LOG("\nnote: a lower temperature value like 0.1 is recommended for better quality.\n");
26
- }
27
-
28
- static struct llama_model * llava_init(common_params * params) {
29
- llama_backend_init();
30
- llama_numa_init(params->numa);
31
-
32
- llama_model_params model_params = common_model_params_to_llama(*params);
33
-
34
- llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params);
35
- if (model == NULL) {
36
- LOG_ERR("%s: unable to load model\n" , __func__);
37
- return NULL;
38
- }
39
- return model;
40
- }
41
-
42
- static struct llava_context * llava_init_context(common_params * params, llama_model * model) {
43
- auto prompt = params->prompt;
44
- if (prompt.empty()) {
45
- prompt = "describe the image in detail.";
46
- }
47
-
48
- llama_context_params ctx_params = common_context_params_to_llama(*params);
49
- if (params->n_ctx < 2048) {
50
- // warn user here, "Image processing requires at least 2048 context, setting context to 2048"
51
- LOG_WRN("%s: Image processing requires at least 2048 context, setting context to 2048\n" , __func__);
52
- ctx_params.n_ctx = 2048;
53
- } else {
54
- ctx_params.n_ctx = params->n_ctx;
55
- }
56
-
57
- llama_context * ctx_llama = llama_init_from_model(model, ctx_params);
58
-
59
- if (ctx_llama == NULL) {
60
- LOG_ERR("%s: failed to create the llama_context\n" , __func__);
61
- return NULL;
62
- }
63
-
64
- auto * ctx_llava = (struct llava_context *)malloc(sizeof(llava_context));
65
-
66
- ctx_llava->ctx_llama = ctx_llama;
67
- ctx_llava->model = model;
68
- return ctx_llava;
69
- }
70
-
71
- static void llava_free(struct llava_context * ctx_llava) {
72
- if (ctx_llava->ctx_clip) {
73
- clip_free(ctx_llava->ctx_clip);
74
- ctx_llava->ctx_clip = NULL;
75
- }
76
-
77
- llama_free(ctx_llava->ctx_llama);
78
- llama_model_free(ctx_llava->model);
79
- llama_backend_free();
80
- }
81
-
82
- static struct clip_ctx * clip_init_context(common_params * params) {
83
- const char * clip_path = params->mmproj.c_str();
84
-
85
- auto prompt = params->prompt;
86
- if (prompt.empty()) {
87
- prompt = "describe the image in detail.";
88
- }
89
- struct clip_context_params clip_params = {
90
- /* use_gpu */ params->n_gpu_layers != 0,
91
- /* verbosity */ params->verbosity,
92
- };
93
- auto * ctx_clip = clip_init(clip_path, clip_params);
94
- return ctx_clip;
95
- }
96
-
97
- static bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_token> tokens, int n_batch, int * n_past) {
98
- int N = (int) tokens.size();
99
- for (int i = 0; i < N; i += n_batch) {
100
- int n_eval = (int) tokens.size() - i;
101
- if (n_eval > n_batch) {
102
- n_eval = n_batch;
103
- }
104
- if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval))) {
105
- LOG_ERR("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, N, n_batch, *n_past);
106
- return false;
107
- }
108
- *n_past += n_eval;
109
- }
110
- return true;
111
- }
112
-
113
- static bool eval_id(struct llama_context * ctx_llama, int id, int * n_past) {
114
- std::vector<llama_token> tokens;
115
- tokens.push_back(id);
116
- return eval_tokens(ctx_llama, tokens, 1, n_past);
117
- }
118
-
119
- static bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){
120
- std::string str2 = str;
121
- std::vector<llama_token> embd_inp = common_tokenize(ctx_llama, str2, add_bos, true);
122
- return eval_tokens(ctx_llama, embd_inp, n_batch, n_past);
123
- }
124
-
125
- static void process_eval_image_embed(struct llava_context * ctx_llava, const struct llava_image_embed * embeds, int n_batch, int * n_past, int idx) {
126
- float * image_embed = (float *)malloc(clip_embd_nbytes(ctx_llava->ctx_clip));
127
- std::memcpy(image_embed, embeds->embed + idx * clip_n_patches(ctx_llava->ctx_clip) * clip_n_mmproj_embd(ctx_llava->ctx_clip), clip_embd_nbytes(ctx_llava->ctx_clip));
128
-
129
- auto * slice_embed = (llava_image_embed*)malloc(sizeof(llava_image_embed));
130
- slice_embed->embed = image_embed;
131
- slice_embed->n_image_pos = clip_n_patches(ctx_llava->ctx_clip);
132
- llava_eval_image_embed(ctx_llava->ctx_llama, slice_embed, n_batch, n_past);
133
- llava_image_embed_free(slice_embed);
134
- }
135
-
136
- static void process_image(struct llava_context * ctx_llava, struct llava_image_embed * embeds, common_params * params, int &n_past) {
137
- std::string system_prompt;
138
- int idx = 0;
139
- int num_image_embeds = embeds->n_image_pos / clip_n_patches(ctx_llava->ctx_clip);
140
- int has_minicpmv_projector = clip_is_minicpmv(ctx_llava->ctx_clip);
141
- if (has_minicpmv_projector == 2) {
142
- system_prompt = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n";
143
- }
144
- else if (has_minicpmv_projector == 3) {
145
- system_prompt = "<|im_start|>user\n";
146
- }
147
- else if (has_minicpmv_projector == 4) {
148
- system_prompt = "<|im_start|>user\n";
149
- }
150
- LOG_INF("%s: image token past: %d\n", __func__, n_past);
151
- eval_string(ctx_llava->ctx_llama, (system_prompt+"<image>").c_str(), params->n_batch, &n_past, false);
152
- process_eval_image_embed(ctx_llava, embeds, params->n_batch, &n_past, idx++);
153
- eval_string(ctx_llava->ctx_llama, std::string("</image>").c_str(), params->n_batch, &n_past, false);
154
- if (num_image_embeds > 1) {
155
- if (has_minicpmv_projector == 2) {
156
- size_t num_image_embeds_col = clip_uhd_num_image_embeds_col(ctx_llava->ctx_clip);
157
- eval_string(ctx_llava->ctx_llama, std::string("<slice>").c_str(), params->n_batch, &n_past, false);
158
- for (size_t i = 0; i < (num_image_embeds-1)/num_image_embeds_col; ++i) {
159
- for (size_t j = 0; j < num_image_embeds_col; ++j) {
160
- eval_string(ctx_llava->ctx_llama, std::string("<image>").c_str(), params->n_batch, &n_past, false);
161
- process_eval_image_embed(ctx_llava, embeds, params->n_batch, &n_past, idx++);
162
- eval_string(ctx_llava->ctx_llama, std::string("</image>").c_str(), params->n_batch, &n_past, false);
163
- if (j == num_image_embeds_col - 1) {
164
- eval_string(ctx_llava->ctx_llama, std::string("\n").c_str(), params->n_batch, &n_past, false);
165
- }
166
- }
167
- }
168
- eval_string(ctx_llava->ctx_llama, std::string("</slice>").c_str(), params->n_batch, &n_past, false);
169
- }
170
- else if (has_minicpmv_projector == 3 || has_minicpmv_projector == 4) {
171
- size_t num_image_embeds_col = clip_uhd_num_image_embeds_col(ctx_llava->ctx_clip);
172
- for (size_t i = 0; i < (num_image_embeds-1)/num_image_embeds_col; ++i) {
173
- for (size_t j = 0; j < num_image_embeds_col; ++j) {
174
- eval_string(ctx_llava->ctx_llama, std::string("<slice>").c_str(), params->n_batch, &n_past, false);
175
- process_eval_image_embed(ctx_llava, embeds, params->n_batch, &n_past, idx++);
176
- eval_string(ctx_llava->ctx_llama, std::string("</slice>").c_str(), params->n_batch, &n_past, false);
177
- if (j == num_image_embeds_col - 1) {
178
- eval_string(ctx_llava->ctx_llama, std::string("\n").c_str(), params->n_batch, &n_past, false);
179
- }
180
- }
181
- }
182
- }
183
- }
184
- LOG_INF("%s: image token past: %d\n", __func__, n_past);
185
- }
186
-
187
- static const char * sample(struct common_sampler * smpl,
188
- struct llama_context * ctx_llama,
189
- int * n_past) {
190
- const llama_token id = common_sampler_sample(smpl, ctx_llama, -1);
191
- common_sampler_accept(smpl, id, true);
192
-
193
- const llama_model * model = llama_get_model(ctx_llama);
194
- const llama_vocab * vocab = llama_model_get_vocab(model);
195
-
196
- static std::string ret;
197
- if (llama_vocab_is_eog(vocab, id)) {
198
- ret = "</s>";
199
- } else {
200
- ret = common_token_to_piece(ctx_llama, id);
201
- }
202
- eval_id(ctx_llama, id, n_past);
203
- return ret.c_str();
204
- }
205
-
206
- static struct llava_context * minicpmv_init(common_params * params, const std::string & fname, int &n_past){
207
- auto * ctx_clip = clip_init_context(params);
208
- auto * embeds = llava_image_embed_make_with_filename(ctx_clip, params->cpuparams.n_threads, fname.c_str());
209
- if (!embeds) {
210
- LOG_ERR("failed to load image %s. Terminating\n\n", fname.c_str());
211
- return NULL;
212
- }
213
-
214
- // process the prompt
215
- if (params->prompt.empty() && params->interactive == false) {
216
- LOG_ERR("prompt should be given or interactive mode should be on");
217
- return NULL;
218
- }
219
-
220
- auto * model = llava_init(params);
221
- if (model == NULL) {
222
- fprintf(stderr, "%s: error: failed to init minicpmv model\n", __func__);
223
- return NULL;
224
- }
225
- const int64_t t_llava_init_start_us = ggml_time_us();
226
- auto * ctx_llava = llava_init_context(params, model);
227
- ctx_llava->ctx_clip = ctx_clip;
228
- const int64_t t_llava_init_end_us = ggml_time_us();
229
- float t_llava_init_ms = (t_llava_init_end_us - t_llava_init_start_us) / 1000.0;
230
- LOG_INF("%s: llava init in %8.2f ms.\n", __func__, t_llava_init_ms);
231
-
232
- const int64_t t_process_image_start_us = ggml_time_us();
233
- process_image(ctx_llava, embeds, params, n_past);
234
- const int64_t t_process_image_end_us = ggml_time_us();
235
- float t_process_image_ms = (t_process_image_end_us - t_process_image_start_us) / 1000.0;
236
- LOG_INF("%s: llama process image in %8.2f ms.\n", __func__, t_process_image_ms);
237
-
238
- llava_image_embed_free(embeds);
239
- return ctx_llava;
240
- }
241
-
242
- static struct common_sampler * llama_init(struct llava_context * ctx_llava, common_params * params, const std::string & prompt, int & n_past, bool is_first = false){
243
- std::string user_prompt = prompt;
244
- int has_minicpmv_projector = clip_is_minicpmv(ctx_llava->ctx_clip);
245
- if (!is_first) {
246
- if (has_minicpmv_projector == 2) {
247
- user_prompt = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n" + prompt;
248
- }
249
- else if (has_minicpmv_projector == 3) {
250
- user_prompt = "<|im_start|>user\n" + prompt;
251
- }
252
- else if (has_minicpmv_projector == 4) {
253
- user_prompt = "<|im_start|>user\n" + prompt;
254
- }
255
- }
256
-
257
- eval_string(ctx_llava->ctx_llama, user_prompt.c_str(), params->n_batch, &n_past, false);
258
- if (has_minicpmv_projector == 2) {
259
- eval_string(ctx_llava->ctx_llama, "<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", params->n_batch, &n_past, false);
260
- }
261
- else if (has_minicpmv_projector == 3) {
262
- eval_string(ctx_llava->ctx_llama, "<|im_end|><|im_start|>assistant\n", params->n_batch, &n_past, false);
263
- }
264
- else if (has_minicpmv_projector == 4) {
265
- eval_string(ctx_llava->ctx_llama, "<|im_end|><|im_start|>assistant\n", params->n_batch, &n_past, false);
266
- }
267
-
268
- // generate the response
269
-
270
- LOG_INF("\n");
271
-
272
- struct common_sampler * smpl = common_sampler_init(ctx_llava->model, params->sampling);
273
- return smpl;
274
- }
275
-
276
- static const char * llama_loop(struct llava_context * ctx_llava,struct common_sampler * smpl, int &n_past){
277
-
278
- const char * tmp = sample(smpl, ctx_llava->ctx_llama, &n_past);
279
- return tmp;
280
- }
281
-
282
- int main(int argc, char ** argv) {
283
- ggml_time_init();
284
-
285
- common_params params;
286
-
287
- if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_LLAVA, show_additional_info)) {
288
- return 1;
289
- }
290
-
291
- common_init();
292
-
293
- if (params.mmproj.empty() || (params.image.empty())) {
294
- show_additional_info(argc, argv);
295
- return 1;
296
- }
297
-
298
- for (auto & image : params.image) {
299
- int n_past = 0;
300
- auto * ctx_llava = minicpmv_init(&params, image, n_past);
301
-
302
- if (!params.prompt.empty()) {
303
- LOG("<user>%s\n", params.prompt.c_str());
304
- LOG("<assistant>");
305
- auto * smpl = llama_init(ctx_llava, &params, params.prompt, n_past, true);
306
- const int max_tgt_len = params.n_predict < 0 ? 256 : params.n_predict;
307
- std::string response;
308
- bool have_tmp = false;
309
- for (int i = 0; i < max_tgt_len; i++) {
310
- const auto * tmp = llama_loop(ctx_llava, smpl, n_past);
311
- response += tmp;
312
- if (strcmp(tmp, "</s>") == 0){
313
- if (!have_tmp) {
314
- continue;
315
- }
316
- break;
317
- }
318
- if (strstr(tmp, "###")) break; // Yi-VL behavior
319
- have_tmp = true;
320
- printf("%s", tmp);
321
- if (strstr(response.c_str(), "<user>")) break; // minicpm-v
322
-
323
- fflush(stdout);
324
- }
325
- common_sampler_free(smpl);
326
- }else {
327
- while (true) {
328
- LOG("<user>");
329
- std::string prompt;
330
- std::getline(std::cin, prompt);
331
- LOG("<assistant>");
332
- auto * smpl = llama_init(ctx_llava, &params, prompt, n_past, true);
333
- const int max_tgt_len = params.n_predict < 0 ? 256 : params.n_predict;
334
- std::string response;
335
- for (int i = 0; i < max_tgt_len; i++) {
336
- const auto * tmp = llama_loop(ctx_llava, smpl, n_past);
337
- response += tmp;
338
- if (strcmp(tmp, "</s>") == 0) break;
339
- printf("%s", tmp);// mistral llava-1.6
340
- if (strstr(response.c_str(), "<user>")) break; // minicpm-v
341
- fflush(stdout);
342
- }
343
- common_sampler_free(smpl);
344
- }
345
- }
346
- printf("\n");
347
- llama_perf_context_print(ctx_llava->ctx_llama);
348
-
349
- ctx_llava->model = NULL;
350
- llava_free(ctx_llava);
351
- }
352
-
353
- return 0;
354
- }