@fugood/llama.node 0.3.16 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (281) hide show
  1. package/CMakeLists.txt +6 -1
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +44 -2
  19. package/lib/index.js +132 -1
  20. package/lib/index.ts +203 -3
  21. package/package.json +2 -1
  22. package/src/EmbeddingWorker.cpp +1 -1
  23. package/src/LlamaCompletionWorker.cpp +374 -19
  24. package/src/LlamaCompletionWorker.h +31 -10
  25. package/src/LlamaContext.cpp +216 -7
  26. package/src/LlamaContext.h +12 -0
  27. package/src/common.hpp +15 -0
  28. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
  29. package/src/llama.cpp/.github/workflows/build.yml +89 -767
  30. package/src/llama.cpp/.github/workflows/docker.yml +9 -6
  31. package/src/llama.cpp/.github/workflows/release.yml +716 -0
  32. package/src/llama.cpp/.github/workflows/server.yml +19 -23
  33. package/src/llama.cpp/CMakeLists.txt +11 -1
  34. package/src/llama.cpp/cmake/build-info.cmake +8 -2
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
  36. package/src/llama.cpp/common/CMakeLists.txt +35 -4
  37. package/src/llama.cpp/common/arg.cpp +844 -121
  38. package/src/llama.cpp/common/arg.h +9 -0
  39. package/src/llama.cpp/common/chat.cpp +129 -107
  40. package/src/llama.cpp/common/chat.h +2 -0
  41. package/src/llama.cpp/common/common.cpp +64 -518
  42. package/src/llama.cpp/common/common.h +35 -45
  43. package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
  44. package/src/llama.cpp/common/llguidance.cpp +31 -47
  45. package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
  46. package/src/llama.cpp/common/minja/minja.hpp +186 -127
  47. package/src/llama.cpp/common/regex-partial.cpp +204 -0
  48. package/src/llama.cpp/common/regex-partial.h +56 -0
  49. package/src/llama.cpp/common/sampling.cpp +60 -50
  50. package/src/llama.cpp/docs/build.md +122 -7
  51. package/src/llama.cpp/examples/CMakeLists.txt +2 -32
  52. package/src/llama.cpp/examples/batched/batched.cpp +1 -1
  53. package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
  54. package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
  55. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  56. package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
  57. package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
  58. package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
  59. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
  60. package/src/llama.cpp/examples/sycl/build.sh +2 -2
  61. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
  62. package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
  63. package/src/llama.cpp/examples/training/finetune.cpp +96 -0
  64. package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
  65. package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
  66. package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
  67. package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
  68. package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
  69. package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
  70. package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
  71. package/src/llama.cpp/ggml/include/ggml.h +76 -106
  72. package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
  73. package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
  74. package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
  75. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
  76. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
  77. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
  78. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
  79. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
  80. package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
  81. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
  82. package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
  83. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
  84. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  85. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  86. package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
  87. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
  88. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
  89. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
  90. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
  91. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
  92. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
  93. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
  94. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
  95. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
  96. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
  97. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
  98. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
  99. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
  100. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  101. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  102. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
  103. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
  104. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
  105. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
  106. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
  107. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
  108. package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
  109. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
  110. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
  111. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
  112. package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
  113. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
  114. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
  115. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
  116. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
  117. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
  118. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  119. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
  120. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
  121. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
  122. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
  123. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  124. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
  125. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
  126. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
  127. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
  128. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  129. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
  130. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
  131. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
  132. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
  133. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
  134. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
  135. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
  136. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
  137. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
  138. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
  140. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
  141. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
  142. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
  143. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
  144. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
  145. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
  146. package/src/llama.cpp/ggml/src/ggml.c +170 -265
  147. package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
  148. package/src/llama.cpp/include/llama.h +82 -22
  149. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
  150. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
  151. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
  152. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
  153. package/src/llama.cpp/requirements/requirements-all.txt +5 -3
  154. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
  155. package/src/llama.cpp/scripts/xxd.cmake +1 -1
  156. package/src/llama.cpp/src/CMakeLists.txt +4 -2
  157. package/src/llama.cpp/src/llama-adapter.cpp +43 -1
  158. package/src/llama.cpp/src/llama-arch.cpp +163 -17
  159. package/src/llama.cpp/src/llama-arch.h +16 -0
  160. package/src/llama.cpp/src/llama-batch.cpp +5 -1
  161. package/src/llama.cpp/src/llama-batch.h +2 -1
  162. package/src/llama.cpp/src/llama-chat.cpp +91 -16
  163. package/src/llama.cpp/src/llama-chat.h +7 -2
  164. package/src/llama.cpp/src/llama-context.cpp +479 -575
  165. package/src/llama.cpp/src/llama-context.h +44 -33
  166. package/src/llama.cpp/src/llama-cparams.h +1 -0
  167. package/src/llama.cpp/src/llama-graph.cpp +209 -157
  168. package/src/llama.cpp/src/llama-graph.h +38 -14
  169. package/src/llama.cpp/src/llama-hparams.h +13 -0
  170. package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
  171. package/src/llama.cpp/src/llama-kv-cache.h +283 -171
  172. package/src/llama.cpp/src/llama-memory.h +12 -2
  173. package/src/llama.cpp/src/llama-mmap.cpp +1 -1
  174. package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
  175. package/src/llama.cpp/src/llama-model-loader.h +5 -3
  176. package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
  177. package/src/llama.cpp/src/llama-model-saver.h +37 -0
  178. package/src/llama.cpp/src/llama-model.cpp +1803 -330
  179. package/src/llama.cpp/src/llama-model.h +21 -2
  180. package/src/llama.cpp/src/llama-quant.cpp +33 -10
  181. package/src/llama.cpp/src/llama-sampling.cpp +25 -7
  182. package/src/llama.cpp/src/llama-vocab.cpp +86 -10
  183. package/src/llama.cpp/src/llama-vocab.h +6 -0
  184. package/src/llama.cpp/src/llama.cpp +15 -1
  185. package/src/llama.cpp/tests/CMakeLists.txt +52 -31
  186. package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
  187. package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
  188. package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
  189. package/src/llama.cpp/tests/test-chat.cpp +15 -3
  190. package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
  191. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
  192. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
  193. package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
  194. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
  195. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
  196. package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
  197. package/src/llama.cpp/tests/test-opt.cpp +33 -21
  198. package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
  199. package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
  200. package/src/llama.cpp/tests/test-sampling.cpp +1 -1
  201. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
  202. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
  203. package/src/llama.cpp/tools/CMakeLists.txt +39 -0
  204. package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
  205. package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
  206. package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
  207. package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
  208. package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
  209. package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
  210. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
  211. package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
  212. package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
  213. package/src/llama.cpp/tools/mtmd/clip.h +99 -0
  214. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
  215. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
  216. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
  217. package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
  218. package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
  219. package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
  220. package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
  221. package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
  222. package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
  223. package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
  224. package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
  225. package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
  226. package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
  227. package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
  228. package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
  229. package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
  230. package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
  231. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
  232. package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
  233. package/src/llama.cpp/examples/infill/infill.cpp +0 -590
  234. package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
  235. package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
  236. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
  237. package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
  238. package/src/llama.cpp/examples/llava/clip.h +0 -118
  239. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
  240. package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
  241. package/src/llama.cpp/examples/llava/llava.cpp +0 -574
  242. package/src/llama.cpp/examples/llava/llava.h +0 -49
  243. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
  244. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
  245. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
  246. package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
  247. package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
  248. package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
  249. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
  250. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
  251. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
  252. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
  253. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
  254. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
  255. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
  256. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
  257. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
  258. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
  259. /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
  260. /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
  261. /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
  262. /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
  263. /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
  264. /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
  265. /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
  266. /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
  267. /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
  268. /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
  269. /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
  270. /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
  271. /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
  272. /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
  273. /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
  274. /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
  275. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
  276. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
  277. /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
  278. /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
  279. /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
  280. /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
  281. /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
@@ -0,0 +1,678 @@
1
+ #include "clip.h"
2
+ #include "clip-impl.h"
3
+ #include "mtmd.h"
4
+
5
+ #include "llama.h"
6
+
7
+ #include <algorithm>
8
+ #include <cerrno>
9
+ #include <cstdio>
10
+ #include <cstdlib>
11
+ #include <cstring>
12
+ #include <limits>
13
+ #include <vector>
14
+
15
+ // represents raw image data, layout is RGBRGBRGB...
16
+ // length of data must be nx * ny * 3
17
+ struct mtmd_bitmap {
18
+ uint32_t nx;
19
+ uint32_t ny;
20
+ std::vector<unsigned char> data;
21
+ std::string id; // optional user-defined id, for ex: can be set to image hash, useful for KV cache tracking
22
+ };
23
+
24
+ struct mtmd_image_tokens_deleter {
25
+ void operator()(mtmd_image_tokens * val); // forward declaration
26
+ };
27
+ using mtmd_image_tokens_ptr = std::unique_ptr<mtmd_image_tokens, mtmd_image_tokens_deleter>;
28
+
29
+ struct mtmd_input_chunk {
30
+ mtmd_input_chunk_type type;
31
+ std::vector<llama_token> tokens_text;
32
+ mtmd_image_tokens_ptr tokens_image;
33
+ };
34
+
35
+ struct mtmd_input_chunks {
36
+ std::vector<mtmd_input_chunk> entries;
37
+ };
38
+
39
+ // slice template, used by some llava-uhd models to correctly place the special tokens around image embeddings
40
+ // models not having it (llava-1.6) will process embeddings without any special tokens in-between
41
+ enum mtmd_slice_tmpl {
42
+ MTMD_SLICE_TMPL_NONE,
43
+ MTMD_SLICE_TMPL_MINICPMV_2_5,
44
+ MTMD_SLICE_TMPL_MINICPMV_2_6,
45
+ // TODO @ngxson : add support for idefics (SmolVLM)
46
+ };
47
+
48
+ mtmd_context_params mtmd_context_params_default() {
49
+ mtmd_context_params params;
50
+ params.use_gpu = true;
51
+ params.print_timings = true;
52
+ params.n_threads = 4;
53
+ params.verbosity = GGML_LOG_LEVEL_INFO;
54
+ params.image_marker = MTMD_DEFAULT_IMAGE_MARKER;
55
+ return params;
56
+ }
57
+
58
+ struct mtmd_context {
59
+ struct clip_ctx * ctx_clip;
60
+ const struct llama_model * text_model;
61
+ std::vector<float> image_embd_v; // image embedding vector
62
+
63
+ bool print_timings;
64
+ int n_threads;
65
+ std::string image_marker;
66
+
67
+ // for minicpmv, we need special tokens in-between slices
68
+ mtmd_slice_tmpl slice_tmpl = MTMD_SLICE_TMPL_NONE;
69
+ llama_token tok_ov_img_start = LLAMA_TOKEN_NULL; // overview image
70
+ llama_token tok_ov_img_end = LLAMA_TOKEN_NULL; // overview image
71
+ llama_token tok_slices_start = LLAMA_TOKEN_NULL; // start of all slices
72
+ llama_token tok_slices_end = LLAMA_TOKEN_NULL; // end of all slices
73
+ llama_token tok_sli_img_start = LLAMA_TOKEN_NULL; // single slice
74
+ llama_token tok_sli_img_end = LLAMA_TOKEN_NULL; // single slice
75
+ llama_token tok_row_end = LLAMA_TOKEN_NULL; // end of row
76
+
77
+ bool use_mrope = false; // for Qwen2VL, we need to use M-RoPE
78
+
79
+ // TODO @ngxson : add timings
80
+
81
+ mtmd_context(const char * mmproj_fname,
82
+ const llama_model * text_model,
83
+ const mtmd_context_params & ctx_params) :
84
+ text_model (text_model),
85
+ print_timings(ctx_params.print_timings),
86
+ n_threads (ctx_params.n_threads),
87
+ image_marker (ctx_params.image_marker)
88
+ {
89
+ clip_context_params ctx_clip_params;
90
+ ctx_clip_params.use_gpu = ctx_params.use_gpu;
91
+ ctx_clip_params.verbosity = ctx_params.verbosity;
92
+ ctx_clip = clip_init(mmproj_fname, ctx_clip_params);
93
+ if (!ctx_clip) {
94
+ throw std::runtime_error(string_format("Failed to load CLIP model from %s\n", mmproj_fname));
95
+ }
96
+
97
+ use_mrope = clip_is_qwen2vl(ctx_clip);
98
+
99
+ int minicpmv_version = clip_is_minicpmv(ctx_clip);
100
+ if (minicpmv_version == 2) {
101
+ // minicpmv 2.5 format:
102
+ // <image> (overview) </image><slice><image> (slice) </image><image> (slice) </image>\n ... </slice>
103
+ slice_tmpl = MTMD_SLICE_TMPL_MINICPMV_2_5;
104
+ tok_ov_img_start = lookup_token("<image>");
105
+ tok_ov_img_end = lookup_token("</image>");
106
+ tok_slices_start = lookup_token("<slice>");
107
+ tok_slices_end = lookup_token("</slice>");
108
+ tok_sli_img_start = tok_ov_img_start;
109
+ tok_sli_img_end = tok_ov_img_end;
110
+ tok_row_end = lookup_token("\n");
111
+
112
+ } else if (minicpmv_version == 3 || minicpmv_version == 4) {
113
+ // minicpmv 2.6 format:
114
+ // <image> (overview) </image><slice> (slice) </slice><slice> (slice) </slice>\n ...
115
+ slice_tmpl = MTMD_SLICE_TMPL_MINICPMV_2_6;
116
+ tok_ov_img_start = lookup_token("<image>");
117
+ tok_ov_img_end = lookup_token("</image>");
118
+ tok_sli_img_start = lookup_token("<slice>");
119
+ tok_sli_img_end = lookup_token("</slice>");
120
+ tok_row_end = lookup_token("\n");
121
+
122
+ } else if (minicpmv_version != 0) {
123
+ GGML_ASSERT(false && "unsupported minicpmv version");
124
+ }
125
+ }
126
+
127
+ ~mtmd_context() {
128
+ clip_free(ctx_clip);
129
+ }
130
+
131
+ private:
132
+ llama_token lookup_token(const std::string & token_text) {
133
+ const llama_vocab * vocab = llama_model_get_vocab(text_model);
134
+ const int n_vocab = llama_vocab_n_tokens(vocab);
135
+ for (int i = 0; i < n_vocab; i++) {
136
+ if (token_to_piece(vocab, i, true) == token_text) {
137
+ return i;
138
+ }
139
+ }
140
+ return LLAMA_TOKEN_NULL;
141
+ }
142
+
143
+ std::string token_to_piece(const llama_vocab * vocab, llama_token token, bool special) {
144
+ std::string piece;
145
+ piece.resize(piece.capacity()); // using string internal cache, 15 bytes + '\n'
146
+ const int n_chars = llama_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special);
147
+ if (n_chars < 0) {
148
+ piece.resize(-n_chars);
149
+ int check = llama_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special);
150
+ GGML_ASSERT(check == -n_chars);
151
+ } else {
152
+ piece.resize(n_chars);
153
+ }
154
+ return piece;
155
+ }
156
+ };
157
+
158
+ struct mtmd_image_tokens_data {
159
+ clip_image_f32_batch batch_f32; // preprocessed image patches
160
+ };
161
+
162
+ struct mtmd_image_tokens {
163
+ uint32_t nx; // number of tokens in x direction
164
+ uint32_t ny; // number of tokens in y direction
165
+ bool use_mrope_pos = false; // use M-RoPE position counting (the whole image is 1 temporal position)
166
+ uint32_t n_tokens() const { return nx * ny; }
167
+ clip_image_f32_batch batch_f32; // preprocessed image patches
168
+ std::string id; // optional user-defined ID, useful for KV cache tracking
169
+
170
+ mtmd_image_tokens clone() {
171
+ return mtmd_image_tokens{
172
+ nx,
173
+ ny,
174
+ use_mrope_pos,
175
+ batch_f32.clone(),
176
+ id
177
+ };
178
+ }
179
+ };
180
+
181
+ mtmd_context * mtmd_init_from_file(const char * mmproj_fname,
182
+ const struct llama_model * text_model,
183
+ const struct mtmd_context_params ctx_params) {
184
+ try {
185
+ return new mtmd_context(mmproj_fname, text_model, ctx_params);
186
+ } catch (const std::exception & e) {
187
+ LOG_ERR("%s: error: %s\n", __func__, e.what());
188
+ return nullptr;
189
+ }
190
+ }
191
+
192
+ void mtmd_free(mtmd_context * ctx) {
193
+ if (ctx) {
194
+ delete ctx;
195
+ }
196
+ }
197
+
198
+ // copied from common_tokenize
199
+ static std::vector<llama_token> mtmd_tokenize_text_internal(
200
+ const struct llama_vocab * vocab,
201
+ const std::string & text,
202
+ bool add_special,
203
+ bool parse_special) {
204
+ // upper limit for the number of tokens
205
+ int n_tokens = text.length() + 2 * add_special;
206
+ std::vector<llama_token> result(n_tokens);
207
+ n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
208
+ if (n_tokens < 0) {
209
+ result.resize(-n_tokens);
210
+ int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
211
+ GGML_ASSERT(check == -n_tokens);
212
+ } else {
213
+ result.resize(n_tokens);
214
+ }
215
+ return result;
216
+ }
217
+
218
+ int32_t mtmd_tokenize(mtmd_context * ctx,
219
+ mtmd_input_chunks * output,
220
+ const mtmd_input_text * text,
221
+ const mtmd_bitmap ** bitmaps,
222
+ size_t n_bitmaps) {
223
+ auto vocab = llama_model_get_vocab(ctx->text_model);
224
+
225
+ std::string prompt_modified(text->text);
226
+ std::string marker_modified(ctx->image_marker);
227
+ projector_type proj_type = clip_get_projector_type(ctx->ctx_clip);
228
+
229
+ // a bit hacky here, but works for now
230
+ // for some models, we need to add prefix and suffix to the image embeddings
231
+ if (clip_is_gemma3(ctx->ctx_clip)) {
232
+ // gemma 3
233
+ // <start_of_image> ... (image embeddings) ... <end_of_image>
234
+ marker_modified = "<start_of_image>" + ctx->image_marker + "<end_of_image>";
235
+ string_replace_all(prompt_modified, ctx->image_marker, marker_modified);
236
+
237
+ } else if (proj_type == PROJECTOR_TYPE_IDEFICS3) {
238
+ // https://github.com/huggingface/transformers/blob/a42ba80fa520c784c8f11a973ca9034e5f859b79/src/transformers/models/idefics3/processing_idefics3.py#L192-L215
239
+ marker_modified = "<fake_token_around_image><global-img>" + ctx->image_marker + "<fake_token_around_image>";
240
+ string_replace_all(prompt_modified, ctx->image_marker, marker_modified);
241
+
242
+ } else if (proj_type == PROJECTOR_TYPE_PIXTRAL) {
243
+ // https://github.com/huggingface/transformers/blob/1cd110c6cb6a6237614130c470e9a902dbc1a4bd/docs/source/en/model_doc/pixtral.md
244
+ marker_modified = ctx->image_marker + "[IMG_END]";
245
+ string_replace_all(prompt_modified, ctx->image_marker, marker_modified);
246
+ }
247
+
248
+ else if (proj_type == PROJECTOR_TYPE_QWEN2VL || proj_type == PROJECTOR_TYPE_QWEN25VL) {
249
+ // <|vision_start|> ... (image embeddings) ... <|vision_end|>
250
+ marker_modified = "<|vision_start|>" + ctx->image_marker + "<|vision_end|>";
251
+ string_replace_all(prompt_modified, ctx->image_marker, marker_modified);
252
+
253
+ }
254
+
255
+ else if (proj_type == PROJECTOR_TYPE_INTERNVL) {
256
+ // <img> ... (image embeddings) ... </img>
257
+ marker_modified = "<img>" + ctx->image_marker + "</img>";
258
+ string_replace_all(prompt_modified, ctx->image_marker, marker_modified);
259
+
260
+ }
261
+
262
+ // llava-1.5, llava-1.6, Yi-VL, Yi-34B, granite: don't need to add prefix and suffix
263
+ // for glm-edge, BOI and EOI token's embeddings are not present in the text model
264
+
265
+ std::vector<std::string> parts = string_split_str(prompt_modified, ctx->image_marker);
266
+ output->entries.clear();
267
+ output->entries.reserve(parts.size());
268
+
269
+ size_t i_img = 0;
270
+
271
+ // utility for adding raw tokens
272
+ auto add_text_chunk = [&output](std::vector<llama_token> && tokens) {
273
+ mtmd_input_chunk chunk{
274
+ MTMD_INPUT_CHUNK_TYPE_TEXT,
275
+ std::move(tokens),
276
+ {},
277
+ };
278
+ output->entries.emplace_back(std::move(chunk));
279
+ };
280
+
281
+ // utility for splitting batch of multiple images into chunks of batch having single images
282
+ auto split_batch_to_chunk = [&ctx](clip_image_f32_batch && batch_f32, const std::string & id) {
283
+ std::vector<mtmd_input_chunk> chunks;
284
+
285
+ for (auto & entry : batch_f32.entries) {
286
+ mtmd_image_tokens_ptr image_tokens(new mtmd_image_tokens);
287
+ image_tokens->nx = clip_n_output_tokens(ctx->ctx_clip, entry.get());
288
+ image_tokens->ny = 1;
289
+ image_tokens->batch_f32.entries.push_back(std::move(entry));
290
+ image_tokens->id = id;
291
+
292
+ mtmd_input_chunk chunk{
293
+ MTMD_INPUT_CHUNK_TYPE_IMAGE,
294
+ {},
295
+ std::move(image_tokens),
296
+ };
297
+ chunks.emplace_back(std::move(chunk));
298
+ }
299
+
300
+ return chunks;
301
+ };
302
+
303
+ for (const auto & part : parts) {
304
+ // printf("tokenizing part: %s\n", part.c_str());
305
+ bool add_bos = &parts.front() == &part;
306
+ auto tokens = mtmd_tokenize_text_internal(vocab, part, text->add_special && add_bos, text->parse_special);
307
+ if (tokens.empty()) {
308
+ continue;
309
+ }
310
+ mtmd_input_chunk chunk{
311
+ MTMD_INPUT_CHUNK_TYPE_TEXT,
312
+ std::move(tokens),
313
+ {},
314
+ };
315
+ output->entries.emplace_back(std::move(chunk));
316
+
317
+ if (&parts.back() != &part) {
318
+ // add image token to middle of 2 parts
319
+
320
+ if (i_img >= n_bitmaps) {
321
+ LOG_ERR("%s: error: not enough images for %d parts\n", __func__, (int)parts.size());
322
+ return 1;
323
+ }
324
+
325
+ // convert mtmd_bitmap to clip_image_u8
326
+ clip_image_u8_ptr img_u8(clip_image_u8_init());
327
+ img_u8->nx = bitmaps[i_img]->nx;
328
+ img_u8->ny = bitmaps[i_img]->ny;
329
+ img_u8->buf.resize(bitmaps[i_img]->data.size());
330
+ std::memcpy(img_u8->buf.data(), bitmaps[i_img]->data.data(), img_u8->nx * img_u8->ny * 3);
331
+ clip_image_size img_u8_size{img_u8->nx, img_u8->ny};
332
+
333
+ // preprocess image
334
+ clip_image_f32_batch batch_f32;
335
+ bool ok = clip_image_preprocess(ctx->ctx_clip, img_u8.get(), &batch_f32);
336
+ if (!ok) {
337
+ LOG_ERR("Unable to preprocess image\n");
338
+ return 2;
339
+ }
340
+
341
+ if (ctx->slice_tmpl == MTMD_SLICE_TMPL_MINICPMV_2_5 || ctx->slice_tmpl == MTMD_SLICE_TMPL_MINICPMV_2_6) {
342
+ // split batch into chunks of single images
343
+ auto chunks = split_batch_to_chunk(std::move(batch_f32), bitmaps[i_img]->id);
344
+ GGML_ASSERT(chunks.size() > 0);
345
+
346
+ // add overview image
347
+ add_text_chunk({ctx->tok_ov_img_start});
348
+ output->entries.emplace_back(std::move(chunks.front()));
349
+ chunks.erase(chunks.begin());
350
+ add_text_chunk({ctx->tok_ov_img_end});
351
+
352
+ // add slices
353
+ if (!chunks.empty()) {
354
+ clip_add_load_image_size(ctx->ctx_clip, &img_u8_size);
355
+ int n_col = clip_uhd_num_image_embeds_col(ctx->ctx_clip);
356
+ int n_row = (int)chunks.size() / n_col;
357
+ GGML_ASSERT(n_row * n_col == (int)chunks.size());
358
+ if (ctx->tok_slices_start != LLAMA_TOKEN_NULL) {
359
+ add_text_chunk({ctx->tok_slices_start});
360
+ }
361
+ for (int y = 0; y < n_row; y++) {
362
+ for (int x = 0; x < n_col; x++) {
363
+ if (ctx->tok_sli_img_start != LLAMA_TOKEN_NULL) {
364
+ add_text_chunk({ctx->tok_sli_img_start});
365
+ }
366
+ output->entries.emplace_back(std::move(chunks[y * n_col + x]));
367
+ if (ctx->tok_sli_img_end != LLAMA_TOKEN_NULL) {
368
+ add_text_chunk({ctx->tok_sli_img_end});
369
+ }
370
+ }
371
+ if (ctx->tok_row_end != LLAMA_TOKEN_NULL && y != n_row - 1) {
372
+ add_text_chunk({ctx->tok_row_end});
373
+ }
374
+ }
375
+ if (ctx->tok_slices_end != LLAMA_TOKEN_NULL) {
376
+ add_text_chunk({ctx->tok_slices_end});
377
+ }
378
+ }
379
+
380
+ } else {
381
+ size_t n_tokens = 0;
382
+ for (const auto & entry : batch_f32.entries) {
383
+ n_tokens += clip_n_output_tokens(ctx->ctx_clip, entry.get());
384
+ }
385
+
386
+ mtmd_image_tokens_ptr image_tokens(new mtmd_image_tokens);
387
+ if (ctx->use_mrope) {
388
+ // for Qwen2VL, we need this information for M-RoPE decoding positions
389
+ image_tokens->nx = clip_n_output_tokens_x(ctx->ctx_clip, batch_f32.entries[0].get());
390
+ image_tokens->ny = clip_n_output_tokens_y(ctx->ctx_clip, batch_f32.entries[0].get());
391
+ image_tokens->use_mrope_pos = true;
392
+ } else {
393
+ // other models, we only need the total number of tokens
394
+ image_tokens->nx = n_tokens;
395
+ image_tokens->ny = 1;
396
+ }
397
+ image_tokens->batch_f32 = std::move(batch_f32);
398
+ image_tokens->id = bitmaps[i_img]->id; // optional
399
+
400
+ LOG_DBG("image_tokens->nx = %d\n", image_tokens->nx);
401
+ LOG_DBG("image_tokens->ny = %d\n", image_tokens->ny);
402
+ LOG_DBG("batch_f32 size = %d\n", (int)image_tokens->batch_f32.entries.size());
403
+
404
+ mtmd_input_chunk chunk{
405
+ MTMD_INPUT_CHUNK_TYPE_IMAGE,
406
+ {},
407
+ std::move(image_tokens),
408
+ };
409
+ output->entries.emplace_back(std::move(chunk));
410
+ }
411
+
412
+ i_img++; // move to next image
413
+ }
414
+ }
415
+
416
+ return 0;
417
+ }
418
+
419
+ static void mtmd_image_tokens_free(mtmd_image_tokens * image_tokens) {
420
+ if (image_tokens) {
421
+ delete image_tokens;
422
+ }
423
+ }
424
+
425
+ int32_t mtmd_encode(mtmd_context * ctx, const mtmd_image_tokens * image_tokens) {
426
+ int n_mmproj_embd = clip_n_mmproj_embd(ctx->ctx_clip);
427
+ ctx->image_embd_v.resize(image_tokens->n_tokens() * n_mmproj_embd);
428
+ bool ok = false;
429
+
430
+ // only effective for minicpmv and qwen2vl, other models will ignore load_image_size
431
+ {
432
+ clip_image_size slice_size{
433
+ image_tokens->batch_f32.entries[0]->nx,
434
+ image_tokens->batch_f32.entries[0]->ny};
435
+ clip_add_load_image_size(ctx->ctx_clip, &slice_size);
436
+ }
437
+
438
+ if (clip_is_llava(ctx->ctx_clip) || clip_is_minicpmv(ctx->ctx_clip) || clip_is_glm(ctx->ctx_clip)) {
439
+ // TODO @ngxson : llava does not support batched encoding ; this should be fixed inside clip_image_batch_encode()
440
+ const auto & entries = image_tokens->batch_f32.entries;
441
+ for (size_t i = 0; i < entries.size(); i++) {
442
+ int n_tokens_per_image = clip_n_output_tokens(ctx->ctx_clip, entries[i].get());
443
+ ok = clip_image_encode(
444
+ ctx->ctx_clip,
445
+ ctx->n_threads,
446
+ entries[i].get(),
447
+ ctx->image_embd_v.data() + i*n_mmproj_embd*n_tokens_per_image);
448
+ }
449
+ } else {
450
+ ok = clip_image_batch_encode(
451
+ ctx->ctx_clip,
452
+ ctx->n_threads,
453
+ &image_tokens->batch_f32,
454
+ ctx->image_embd_v.data());
455
+ }
456
+
457
+ return ok ? 0 : 1;
458
+ }
459
+
460
+ float * mtmd_get_output_embd(mtmd_context * ctx) {
461
+ return ctx->image_embd_v.data();
462
+ }
463
+
464
+ bool mtmd_decode_use_non_causal(mtmd_context * ctx) {
465
+ projector_type proj_type = clip_get_projector_type(ctx->ctx_clip);
466
+ if (proj_type == PROJECTOR_TYPE_GEMMA3) {
467
+ return true;
468
+ }
469
+ return false;
470
+ }
471
+
472
+ bool mtmd_decode_use_mrope(mtmd_context * ctx) {
473
+ return ctx->use_mrope;
474
+ }
475
+
476
+ void mtmd_image_tokens_deleter::operator()(mtmd_image_tokens * val) {
477
+ mtmd_image_tokens_free(val);
478
+ }
479
+
480
+ // these 2 helpers below use internal clip_image_u8_ptr,
481
+ // so unfortunately they cannot moved to mtmd-helper.h
482
+ // however, in theory, user can decode image file to bitmap using
483
+ // whichever library they want, and then use mtmd_bitmap_init() to create bitmap
484
+
485
+ mtmd_bitmap * mtmd_helper_bitmap_init_from_buf(const unsigned char * buf, size_t len) {
486
+ clip_image_u8_ptr img_u8(clip_image_u8_init());
487
+ bool ok = clip_image_load_from_bytes(buf, len, img_u8.get());
488
+ if (!ok) {
489
+ LOG_ERR("Unable to load image from buffer\n");
490
+ return nullptr;
491
+ }
492
+ uint32_t nx, ny;
493
+ unsigned char * data = clip_image_u8_get_data(img_u8.get(), &nx, &ny);
494
+ return mtmd_bitmap_init(nx, ny, data);
495
+ }
496
+
497
+ mtmd_bitmap * mtmd_helper_bitmap_init_from_file(const char * fname) {
498
+ clip_image_u8_ptr img_u8(clip_image_u8_init());
499
+ bool ok = clip_image_load_from_file(fname, img_u8.get());
500
+ if (!ok) {
501
+ LOG_ERR("Unable to load image %s\n", fname);
502
+ return nullptr;
503
+ }
504
+ uint32_t nx, ny;
505
+ unsigned char * data = clip_image_u8_get_data(img_u8.get(), &nx, &ny);
506
+ return mtmd_bitmap_init(nx, ny, data);
507
+ }
508
+
509
+ //
510
+ // public API functions
511
+ //
512
+
513
+ // mtmd_bitmap
514
+
515
+ mtmd_bitmap * mtmd_bitmap_init(uint32_t nx,
516
+ uint32_t ny,
517
+ const unsigned char * data) {
518
+ mtmd_bitmap * bitmap = new mtmd_bitmap;
519
+ bitmap->nx = nx;
520
+ bitmap->ny = ny;
521
+ size_t data_size = (size_t)nx * ny * 3;
522
+ bitmap->data.resize(data_size);
523
+ std::memcpy(bitmap->data.data(), data, data_size);
524
+ return bitmap;
525
+ }
526
+
527
+ uint32_t mtmd_bitmap_get_nx(const mtmd_bitmap * bitmap) {
528
+ return bitmap->nx;
529
+ }
530
+
531
+ uint32_t mtmd_bitmap_get_ny(const mtmd_bitmap * bitmap) {
532
+ return bitmap->ny;
533
+ }
534
+
535
+ const unsigned char * mtmd_bitmap_get_data(const mtmd_bitmap * bitmap) {
536
+ return bitmap->data.data();
537
+ }
538
+
539
+ const char * mtmd_bitmap_get_id(const mtmd_bitmap * bitmap) {
540
+ return bitmap->id.c_str();
541
+ }
542
+
543
+ void mtmd_bitmap_set_id(mtmd_bitmap * bitmap, const char * id) {
544
+ if (id) {
545
+ bitmap->id = std::string(id);
546
+ } else {
547
+ bitmap->id.clear();
548
+ }
549
+ }
550
+
551
+ void mtmd_bitmap_free(mtmd_bitmap * bitmap) {
552
+ if (bitmap) {
553
+ delete bitmap;
554
+ }
555
+ }
556
+
557
+ // mtmd_input_chunks
558
+
559
+ mtmd_input_chunks * mtmd_input_chunks_init() {
560
+ return new mtmd_input_chunks;
561
+ }
562
+
563
+ size_t mtmd_input_chunks_size(const mtmd_input_chunks * chunks) {
564
+ return chunks->entries.size();
565
+ }
566
+
567
+ const mtmd_input_chunk * mtmd_input_chunks_get(const mtmd_input_chunks * chunks, size_t idx) {
568
+ if (idx >= chunks->entries.size()) {
569
+ return nullptr;
570
+ }
571
+ return &chunks->entries[idx];
572
+ }
573
+
574
+ void mtmd_input_chunks_free(mtmd_input_chunks * chunks) {
575
+ if (chunks) {
576
+ delete chunks;
577
+ }
578
+ }
579
+
580
+ // mtmd_input_chunk
581
+
582
+ enum mtmd_input_chunk_type mtmd_input_chunk_get_type(const mtmd_input_chunk * chunk) {
583
+ return chunk->type;
584
+ }
585
+
586
+ const llama_token * mtmd_input_chunk_get_tokens_text(const mtmd_input_chunk * chunk, size_t * n_tokens_output) {
587
+ if (chunk->type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
588
+ *n_tokens_output = chunk->tokens_text.size();
589
+ return chunk->tokens_text.data();
590
+ }
591
+ *n_tokens_output = 0;
592
+ return nullptr;
593
+ }
594
+
595
+ const mtmd_image_tokens * mtmd_input_chunk_get_tokens_image(const mtmd_input_chunk * chunk) {
596
+ if (chunk->type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
597
+ return chunk->tokens_image.get();
598
+ }
599
+ return nullptr;
600
+ }
601
+
602
+ mtmd_input_chunk * mtmd_input_chunk_copy(const mtmd_input_chunk * chunk) {
603
+ mtmd_input_chunk * copy = new mtmd_input_chunk{
604
+ chunk->type,
605
+ chunk->tokens_text,
606
+ mtmd_image_tokens_ptr(),
607
+ };
608
+ if (chunk->tokens_image) {
609
+ // copy the image tokens
610
+ copy->tokens_image = mtmd_image_tokens_ptr(new mtmd_image_tokens());
611
+ *copy->tokens_image = chunk->tokens_image->clone();
612
+ }
613
+ return copy;
614
+ }
615
+
616
+ void mtmd_input_chunk_free(mtmd_input_chunk * chunk) {
617
+ if (chunk) {
618
+ delete chunk;
619
+ }
620
+ }
621
+
622
+ // mtmd_image_tokens
623
+
624
+ size_t mtmd_image_tokens_get_n_tokens(const mtmd_image_tokens * image_tokens) {
625
+ return image_tokens->n_tokens();
626
+ }
627
+
628
+ size_t mtmd_image_tokens_get_nx(const mtmd_image_tokens * image_tokens) {
629
+ return image_tokens->nx;
630
+ }
631
+
632
+ size_t mtmd_image_tokens_get_ny(const mtmd_image_tokens * image_tokens) {
633
+ return image_tokens->ny;
634
+ }
635
+
636
+ const char * mtmd_image_tokens_get_id(const mtmd_image_tokens * image_tokens) {
637
+ return image_tokens->id.c_str();
638
+ }
639
+
640
+ llama_pos mtmd_image_tokens_get_n_pos(const mtmd_image_tokens * image_tokens) {
641
+ if (image_tokens->use_mrope_pos) {
642
+ return 1; // for M-RoPE, the whole image is 1 in temporal dimension
643
+ }
644
+ return image_tokens->n_tokens();
645
+ }
646
+
647
+ // test function
648
+
649
+ mtmd_input_chunks * mtmd_test_create_input_chunks() {
650
+ mtmd_input_chunks * chunks = mtmd_input_chunks_init();
651
+ if (!chunks) {
652
+ return nullptr;
653
+ }
654
+
655
+ // create a text chunk
656
+ std::vector<llama_token> tokens_text = { 1, 2, 3, 4, 5 };
657
+ mtmd_input_chunk chunk_text{
658
+ MTMD_INPUT_CHUNK_TYPE_TEXT,
659
+ std::move(tokens_text),
660
+ {},
661
+ };
662
+ chunks->entries.emplace_back(std::move(chunk_text));
663
+
664
+ // create an image chunk
665
+ mtmd_image_tokens_ptr image_tokens(new mtmd_image_tokens);
666
+ image_tokens->nx = 4;
667
+ image_tokens->ny = 4;
668
+ image_tokens->batch_f32.entries.resize(16);
669
+ image_tokens->id = "image_1";
670
+ mtmd_input_chunk chunk_image{
671
+ MTMD_INPUT_CHUNK_TYPE_IMAGE,
672
+ {},
673
+ std::move(image_tokens),
674
+ };
675
+ chunks->entries.emplace_back(std::move(chunk_image));
676
+
677
+ return chunks;
678
+ }