@fugood/llama.node 0.3.16 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (281) hide show
  1. package/CMakeLists.txt +6 -1
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +44 -2
  19. package/lib/index.js +132 -1
  20. package/lib/index.ts +203 -3
  21. package/package.json +2 -1
  22. package/src/EmbeddingWorker.cpp +1 -1
  23. package/src/LlamaCompletionWorker.cpp +374 -19
  24. package/src/LlamaCompletionWorker.h +31 -10
  25. package/src/LlamaContext.cpp +216 -7
  26. package/src/LlamaContext.h +12 -0
  27. package/src/common.hpp +15 -0
  28. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
  29. package/src/llama.cpp/.github/workflows/build.yml +89 -767
  30. package/src/llama.cpp/.github/workflows/docker.yml +9 -6
  31. package/src/llama.cpp/.github/workflows/release.yml +716 -0
  32. package/src/llama.cpp/.github/workflows/server.yml +19 -23
  33. package/src/llama.cpp/CMakeLists.txt +11 -1
  34. package/src/llama.cpp/cmake/build-info.cmake +8 -2
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
  36. package/src/llama.cpp/common/CMakeLists.txt +35 -4
  37. package/src/llama.cpp/common/arg.cpp +844 -121
  38. package/src/llama.cpp/common/arg.h +9 -0
  39. package/src/llama.cpp/common/chat.cpp +129 -107
  40. package/src/llama.cpp/common/chat.h +2 -0
  41. package/src/llama.cpp/common/common.cpp +64 -518
  42. package/src/llama.cpp/common/common.h +35 -45
  43. package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
  44. package/src/llama.cpp/common/llguidance.cpp +31 -47
  45. package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
  46. package/src/llama.cpp/common/minja/minja.hpp +186 -127
  47. package/src/llama.cpp/common/regex-partial.cpp +204 -0
  48. package/src/llama.cpp/common/regex-partial.h +56 -0
  49. package/src/llama.cpp/common/sampling.cpp +60 -50
  50. package/src/llama.cpp/docs/build.md +122 -7
  51. package/src/llama.cpp/examples/CMakeLists.txt +2 -32
  52. package/src/llama.cpp/examples/batched/batched.cpp +1 -1
  53. package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
  54. package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
  55. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  56. package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
  57. package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
  58. package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
  59. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
  60. package/src/llama.cpp/examples/sycl/build.sh +2 -2
  61. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
  62. package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
  63. package/src/llama.cpp/examples/training/finetune.cpp +96 -0
  64. package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
  65. package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
  66. package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
  67. package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
  68. package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
  69. package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
  70. package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
  71. package/src/llama.cpp/ggml/include/ggml.h +76 -106
  72. package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
  73. package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
  74. package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
  75. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
  76. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
  77. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
  78. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
  79. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
  80. package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
  81. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
  82. package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
  83. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
  84. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  85. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  86. package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
  87. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
  88. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
  89. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
  90. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
  91. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
  92. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
  93. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
  94. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
  95. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
  96. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
  97. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
  98. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
  99. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
  100. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  101. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  102. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
  103. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
  104. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
  105. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
  106. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
  107. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
  108. package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
  109. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
  110. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
  111. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
  112. package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
  113. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
  114. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
  115. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
  116. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
  117. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
  118. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  119. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
  120. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
  121. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
  122. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
  123. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  124. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
  125. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
  126. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
  127. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
  128. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  129. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
  130. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
  131. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
  132. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
  133. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
  134. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
  135. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
  136. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
  137. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
  138. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
  140. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
  141. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
  142. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
  143. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
  144. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
  145. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
  146. package/src/llama.cpp/ggml/src/ggml.c +170 -265
  147. package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
  148. package/src/llama.cpp/include/llama.h +82 -22
  149. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
  150. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
  151. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
  152. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
  153. package/src/llama.cpp/requirements/requirements-all.txt +5 -3
  154. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
  155. package/src/llama.cpp/scripts/xxd.cmake +1 -1
  156. package/src/llama.cpp/src/CMakeLists.txt +4 -2
  157. package/src/llama.cpp/src/llama-adapter.cpp +43 -1
  158. package/src/llama.cpp/src/llama-arch.cpp +163 -17
  159. package/src/llama.cpp/src/llama-arch.h +16 -0
  160. package/src/llama.cpp/src/llama-batch.cpp +5 -1
  161. package/src/llama.cpp/src/llama-batch.h +2 -1
  162. package/src/llama.cpp/src/llama-chat.cpp +91 -16
  163. package/src/llama.cpp/src/llama-chat.h +7 -2
  164. package/src/llama.cpp/src/llama-context.cpp +479 -575
  165. package/src/llama.cpp/src/llama-context.h +44 -33
  166. package/src/llama.cpp/src/llama-cparams.h +1 -0
  167. package/src/llama.cpp/src/llama-graph.cpp +209 -157
  168. package/src/llama.cpp/src/llama-graph.h +38 -14
  169. package/src/llama.cpp/src/llama-hparams.h +13 -0
  170. package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
  171. package/src/llama.cpp/src/llama-kv-cache.h +283 -171
  172. package/src/llama.cpp/src/llama-memory.h +12 -2
  173. package/src/llama.cpp/src/llama-mmap.cpp +1 -1
  174. package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
  175. package/src/llama.cpp/src/llama-model-loader.h +5 -3
  176. package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
  177. package/src/llama.cpp/src/llama-model-saver.h +37 -0
  178. package/src/llama.cpp/src/llama-model.cpp +1803 -330
  179. package/src/llama.cpp/src/llama-model.h +21 -2
  180. package/src/llama.cpp/src/llama-quant.cpp +33 -10
  181. package/src/llama.cpp/src/llama-sampling.cpp +25 -7
  182. package/src/llama.cpp/src/llama-vocab.cpp +86 -10
  183. package/src/llama.cpp/src/llama-vocab.h +6 -0
  184. package/src/llama.cpp/src/llama.cpp +15 -1
  185. package/src/llama.cpp/tests/CMakeLists.txt +52 -31
  186. package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
  187. package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
  188. package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
  189. package/src/llama.cpp/tests/test-chat.cpp +15 -3
  190. package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
  191. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
  192. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
  193. package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
  194. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
  195. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
  196. package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
  197. package/src/llama.cpp/tests/test-opt.cpp +33 -21
  198. package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
  199. package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
  200. package/src/llama.cpp/tests/test-sampling.cpp +1 -1
  201. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
  202. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
  203. package/src/llama.cpp/tools/CMakeLists.txt +39 -0
  204. package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
  205. package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
  206. package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
  207. package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
  208. package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
  209. package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
  210. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
  211. package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
  212. package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
  213. package/src/llama.cpp/tools/mtmd/clip.h +99 -0
  214. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
  215. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
  216. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
  217. package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
  218. package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
  219. package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
  220. package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
  221. package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
  222. package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
  223. package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
  224. package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
  225. package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
  226. package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
  227. package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
  228. package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
  229. package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
  230. package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
  231. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
  232. package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
  233. package/src/llama.cpp/examples/infill/infill.cpp +0 -590
  234. package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
  235. package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
  236. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
  237. package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
  238. package/src/llama.cpp/examples/llava/clip.h +0 -118
  239. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
  240. package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
  241. package/src/llama.cpp/examples/llava/llava.cpp +0 -574
  242. package/src/llama.cpp/examples/llava/llava.h +0 -49
  243. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
  244. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
  245. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
  246. package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
  247. package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
  248. package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
  249. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
  250. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
  251. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
  252. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
  253. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
  254. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
  255. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
  256. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
  257. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
  258. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
  259. /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
  260. /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
  261. /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
  262. /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
  263. /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
  264. /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
  265. /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
  266. /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
  267. /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
  268. /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
  269. /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
  270. /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
  271. /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
  272. /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
  273. /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
  274. /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
  275. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
  276. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
  277. /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
  278. /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
  279. /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
  280. /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
  281. /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
@@ -0,0 +1,310 @@
1
+ #include "mtmd.h"
2
+ #include "llama.h"
3
+
4
+ #include <algorithm>
5
+ #include <cinttypes>
6
+ #include <vector>
7
+
8
+ #define LOG_INF(...) fprintf(stdout, __VA_ARGS__)
9
+ #define LOG_ERR(...) fprintf(stderr, __VA_ARGS__)
10
+
11
+ size_t mtmd_helper_get_n_tokens(const mtmd_input_chunks * chunks) {
12
+ size_t n_tokens = 0;
13
+ for (size_t i = 0; i < mtmd_input_chunks_size(chunks); i++) {
14
+ auto chunk = mtmd_input_chunks_get(chunks, i);
15
+ auto chunk_type = mtmd_input_chunk_get_type(chunk);
16
+ if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
17
+ size_t n_tokens_text;
18
+ mtmd_input_chunk_get_tokens_text(chunk, &n_tokens_text);
19
+ n_tokens += n_tokens_text;
20
+ } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
21
+ auto tokens_image = mtmd_input_chunk_get_tokens_image(chunk);
22
+ n_tokens += mtmd_image_tokens_get_n_tokens(tokens_image);
23
+ } else {
24
+ GGML_ASSERT(false && "chunk type not supported");
25
+ }
26
+ }
27
+ return n_tokens;
28
+ }
29
+
30
+ llama_pos mtmd_helper_get_n_pos(const mtmd_input_chunks * chunks) {
31
+ llama_pos n_pos = 0;
32
+ for (size_t i = 0; i < mtmd_input_chunks_size(chunks); i++) {
33
+ auto chunk = mtmd_input_chunks_get(chunks, i);
34
+ auto chunk_type = mtmd_input_chunk_get_type(chunk);
35
+ if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
36
+ size_t n_tokens_text;
37
+ mtmd_input_chunk_get_tokens_text(chunk, &n_tokens_text);
38
+ n_pos += n_tokens_text;
39
+ } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
40
+ auto tokens_image = mtmd_input_chunk_get_tokens_image(chunk);
41
+ n_pos += mtmd_image_tokens_get_n_pos(tokens_image);
42
+ } else {
43
+ GGML_ASSERT(false && "chunk type not supported");
44
+ }
45
+ }
46
+ return n_pos;
47
+ }
48
+
49
+ // helper struct to make working with embd batch easier
50
+ // note: this will be removed after llama_batch_ext refactoring
51
+ struct decode_embd_batch {
52
+ int n_pos_per_embd;
53
+ int n_mmproj_embd;
54
+ std::vector<llama_pos> pos;
55
+ std::vector<llama_pos> pos_view; // used by mrope
56
+ std::vector<int32_t> n_seq_id;
57
+ std::vector<llama_seq_id> seq_id_0;
58
+ std::vector<llama_seq_id *> seq_ids;
59
+ std::vector<int8_t> logits;
60
+ llama_batch batch;
61
+ decode_embd_batch(float * embd, int32_t n_tokens, int n_pos_per_embd, int n_mmproj_embd) : n_pos_per_embd(n_pos_per_embd), n_mmproj_embd(n_mmproj_embd) {
62
+ pos .resize(n_tokens * n_pos_per_embd);
63
+ n_seq_id.resize(n_tokens);
64
+ seq_ids .resize(n_tokens + 1);
65
+ logits .resize(n_tokens);
66
+ seq_id_0.resize(1);
67
+ seq_ids [n_tokens] = nullptr;
68
+ batch = {
69
+ /*n_tokens =*/ n_tokens,
70
+ /*tokens =*/ nullptr,
71
+ /*embd =*/ embd,
72
+ /*pos =*/ pos.data(),
73
+ /*n_seq_id =*/ n_seq_id.data(),
74
+ /*seq_id =*/ seq_ids.data(),
75
+ /*logits =*/ logits.data(),
76
+ };
77
+ }
78
+
79
+ void set_position_normal(llama_pos pos_0, llama_seq_id seq_id) {
80
+ seq_id_0[0] = seq_id;
81
+ for (int i = 0; i < batch.n_tokens; i++) {
82
+ batch.pos [i] = pos_0 + i;
83
+ batch.n_seq_id[i] = 1;
84
+ batch.seq_id [i] = seq_id_0.data();
85
+ batch.logits [i] = false;
86
+ }
87
+ }
88
+
89
+ void set_position_mrope(llama_pos pos_0, int nx, int ny, llama_seq_id seq_id) {
90
+ GGML_ASSERT(n_pos_per_embd == 4);
91
+ seq_id_0[0] = seq_id;
92
+ for (int y = 0; y < ny; y++) {
93
+ for (int x = 0; x < nx; x++) {
94
+ int i = y * nx + x;
95
+ pos[i ] = pos_0;
96
+ pos[i + batch.n_tokens ] = pos_0 + y;
97
+ pos[i + batch.n_tokens * 2] = pos_0 + x;
98
+ pos[i + batch.n_tokens * 3] = 0; // last pos dim is unused
99
+ }
100
+ }
101
+ for (int i = 0; i < batch.n_tokens; i++) {
102
+ batch.n_seq_id[i] = 1;
103
+ batch.seq_id [i] = seq_id_0.data();
104
+ batch.logits [i] = false;
105
+ }
106
+ }
107
+
108
+ llama_batch get_view(int offset, int n_tokens) {
109
+ llama_pos * pos_ptr;
110
+ pos_view.clear();
111
+ pos_view.reserve(n_tokens * n_pos_per_embd);
112
+ if (n_pos_per_embd > 1) {
113
+ // mrope
114
+ // for example, with layout of src: 1234...1234...1234...1234...
115
+ // offset 2 will give us dst: 34...34...34...34...
116
+ for (int i = 0; i < n_pos_per_embd; i++) {
117
+ // assume n_tokens is less than or equal to batch.n_tokens
118
+ // batch.n_tokens is number of **total** tokens
119
+ // n_tokens is number of viewed token
120
+ size_t src_idx = i * batch.n_tokens + offset;
121
+ pos_view.insert(pos_view.end(),
122
+ pos.data() + src_idx,
123
+ pos.data() + src_idx + n_tokens);
124
+ }
125
+ pos_ptr = pos_view.data();
126
+ } else {
127
+ // normal
128
+ pos_ptr = pos.data() + offset;
129
+ }
130
+ return {
131
+ /*n_tokens =*/ n_tokens,
132
+ /*tokens =*/ nullptr,
133
+ /*embd =*/ batch.embd + offset * n_mmproj_embd,
134
+ /*pos =*/ pos_ptr,
135
+ /*n_seq_id =*/ batch.n_seq_id + offset,
136
+ /*seq_id =*/ batch.seq_id + offset,
137
+ /*logits =*/ batch.logits + offset,
138
+ };
139
+ }
140
+ };
141
+
142
+ // Helper function for decoding an image whose embeddings have already been calculated
143
+ int32_t mtmd_helper_decode_image_chunk(
144
+ mtmd_context * ctx,
145
+ struct llama_context * lctx,
146
+ const mtmd_input_chunk * chunk,
147
+ float * encoded_embd,
148
+ llama_pos n_past,
149
+ llama_seq_id seq_id,
150
+ int32_t n_batch,
151
+ llama_pos * new_n_past) {
152
+ if (mtmd_input_chunk_get_type(chunk) != MTMD_INPUT_CHUNK_TYPE_IMAGE) {
153
+ LOG_ERR("failed to decode image chunk: input chunk not of image type\n");
154
+ return -1;
155
+ }
156
+ const auto image_tokens = mtmd_input_chunk_get_tokens_image(chunk);
157
+ if (!image_tokens) {
158
+ LOG_ERR("failed to decode image chunk: image tokens are null\n");
159
+ return -1;
160
+ }
161
+
162
+ const llama_model * model = llama_get_model(lctx);
163
+ int n_mmproj_embd = llama_model_n_embd(model);
164
+ int n_pos_per_embd = mtmd_decode_use_mrope(ctx) ? 4 : 1;
165
+
166
+ int32_t n_tokens = mtmd_image_tokens_get_n_tokens(image_tokens);
167
+ int32_t i_batch = 0;
168
+ int32_t n_img_batches = GGML_PAD(n_tokens, n_batch) / n_batch;
169
+ decode_embd_batch batch_embd(encoded_embd, n_tokens, n_pos_per_embd, n_mmproj_embd);
170
+
171
+ const int nx = mtmd_image_tokens_get_nx(image_tokens);
172
+ const int ny = mtmd_image_tokens_get_ny(image_tokens);
173
+
174
+ if (mtmd_decode_use_mrope(ctx)) {
175
+ batch_embd.set_position_mrope(n_past, nx, ny, seq_id);
176
+ } else {
177
+ batch_embd.set_position_normal(n_past, seq_id);
178
+ }
179
+
180
+ if (mtmd_decode_use_non_causal(ctx)) {
181
+ llama_set_causal_attn(lctx, false);
182
+ // TODO @ngxson : need to make sure only one image is processed at a time, and n_ubatch must be enough to hold the image
183
+ }
184
+
185
+ while (i_batch < n_img_batches) { // split into batches
186
+ int pos_offset = i_batch*n_batch;
187
+ int n_tokens_batch = std::min(n_batch, n_tokens - pos_offset);
188
+ llama_batch batch_embd_view = batch_embd.get_view(pos_offset, n_tokens_batch);
189
+
190
+ LOG_INF("decoding image batch %d/%d, n_tokens_batch = %d\n", i_batch+1, n_img_batches, n_tokens_batch);
191
+
192
+ int64_t t1 = ggml_time_ms();
193
+ int32_t ret = llama_decode(lctx, batch_embd_view);
194
+ if (ret != 0) {
195
+ LOG_ERR("failed to decode image\n");
196
+ llama_set_causal_attn(lctx, true); // restore causal attn
197
+ return ret;
198
+ }
199
+
200
+ LOG_INF("image decoded (batch %d/%d) in %" PRId64 " ms\n", i_batch+1, n_img_batches, ggml_time_ms() - t1);
201
+
202
+ i_batch++;
203
+ }
204
+
205
+ n_past += mtmd_image_tokens_get_n_pos(image_tokens);
206
+ *new_n_past = n_past;
207
+
208
+ if (mtmd_decode_use_non_causal(ctx)) {
209
+ llama_set_causal_attn(lctx, true);
210
+ }
211
+ return 0;
212
+ }
213
+
214
+ int32_t mtmd_helper_eval_chunk_single(mtmd_context * ctx,
215
+ struct llama_context * lctx,
216
+ const mtmd_input_chunk * chunk,
217
+ llama_pos n_past,
218
+ llama_seq_id seq_id,
219
+ int32_t n_batch,
220
+ bool logits_last,
221
+ llama_pos * new_n_past) {
222
+ int32_t ret;
223
+ llama_batch text_batch = llama_batch_init(n_batch, 0, 1);
224
+ auto chunk_type = mtmd_input_chunk_get_type(chunk);
225
+
226
+ if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
227
+ size_t n_tokens;
228
+ const auto tokens = mtmd_input_chunk_get_tokens_text(chunk, &n_tokens);
229
+ // LOG_INF("decoding text chunk, n_tokens = %zu\n", n_tokens);
230
+ size_t i = 0;
231
+ while (i < n_tokens) { // split into batches
232
+ text_batch.n_tokens = 0; // clear the batch
233
+ for (; i < n_tokens && text_batch.n_tokens < n_batch; i++) {
234
+ text_batch.n_tokens++;
235
+ text_batch.token [i] = tokens[i];
236
+ text_batch.pos [i] = n_past++;
237
+ text_batch.n_seq_id[i] = 1;
238
+ text_batch.seq_id [i][0] = seq_id;
239
+ text_batch.logits [i] = false;
240
+ }
241
+ bool is_last_token = (i == n_tokens);
242
+ if (logits_last && is_last_token) {
243
+ text_batch.logits[text_batch.n_tokens - 1] = true;
244
+ }
245
+ ret = llama_decode(lctx, text_batch);
246
+ if (ret != 0) {
247
+ LOG_ERR("failed to decode text\n");
248
+ llama_batch_free(text_batch);
249
+ return ret;
250
+ }
251
+ *new_n_past += text_batch.n_tokens;
252
+ }
253
+
254
+ } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
255
+ const auto image_tokens = mtmd_input_chunk_get_tokens_image(chunk);
256
+ int64_t t0 = ggml_time_ms();
257
+
258
+ LOG_INF("encoding image or slice...\n");
259
+
260
+ ret = mtmd_encode(ctx, image_tokens);
261
+ if (ret != 0) {
262
+ LOG_ERR("failed to encode image\n");
263
+ llama_batch_free(text_batch);
264
+ return ret;
265
+ }
266
+
267
+ LOG_INF("image/slice encoded in %" PRId64 " ms\n", ggml_time_ms() - t0);
268
+
269
+ float * embd = mtmd_get_output_embd(ctx);
270
+ ret = mtmd_helper_decode_image_chunk(ctx, lctx, chunk, embd, n_past, seq_id, n_batch, new_n_past);
271
+ if (ret != 0) {
272
+ LOG_ERR("failed to decode image\n");
273
+ llama_batch_free(text_batch);
274
+ return ret;
275
+ }
276
+ } else {
277
+ GGML_ABORT("chunk type not supported");
278
+ }
279
+
280
+ return 0;
281
+ }
282
+
283
+ int32_t mtmd_helper_eval_chunks(mtmd_context * ctx,
284
+ struct llama_context * lctx,
285
+ const mtmd_input_chunks * chunks,
286
+ llama_pos n_past,
287
+ llama_seq_id seq_id,
288
+ int32_t n_batch,
289
+ bool logits_last,
290
+ llama_pos * new_n_past) {
291
+ size_t n_chunks = mtmd_input_chunks_size(chunks);
292
+ if (n_chunks == 0) {
293
+ LOG_ERR("no chunks to eval\n");
294
+ return 0;
295
+ }
296
+
297
+ for (size_t i = 0; i < n_chunks; i++) {
298
+ bool chunk_logits_last = (i == n_chunks - 1) && logits_last;
299
+ auto chunk = mtmd_input_chunks_get(chunks, i);
300
+
301
+ int32_t res = mtmd_helper_eval_chunk_single(ctx, lctx, chunk, n_past, seq_id, n_batch, chunk_logits_last, &n_past);
302
+ if (res != 0) {
303
+ LOG_ERR("failed to eval chunk %zu\n", i);
304
+ return res;
305
+ }
306
+ *new_n_past = n_past;
307
+ }
308
+
309
+ return 0;
310
+ }