local-llm-rn 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (626) hide show
  1. package/cpp/CMakeLists.txt +285 -0
  2. package/cpp/common/CMakeLists.txt +149 -0
  3. package/cpp/common/arg.cpp +3799 -0
  4. package/cpp/common/arg.h +131 -0
  5. package/cpp/common/base64.hpp +392 -0
  6. package/cpp/common/build-info.cpp.in +4 -0
  7. package/cpp/common/chat-parser-xml-toolcall.cpp +879 -0
  8. package/cpp/common/chat-parser-xml-toolcall.h +45 -0
  9. package/cpp/common/chat-parser.cpp +1649 -0
  10. package/cpp/common/chat-parser.h +133 -0
  11. package/cpp/common/chat-peg-parser.cpp +124 -0
  12. package/cpp/common/chat-peg-parser.h +105 -0
  13. package/cpp/common/chat.cpp +3355 -0
  14. package/cpp/common/chat.h +252 -0
  15. package/cpp/common/common.cpp +1824 -0
  16. package/cpp/common/common.h +930 -0
  17. package/cpp/common/console.cpp +1137 -0
  18. package/cpp/common/console.h +41 -0
  19. package/cpp/common/debug.cpp +167 -0
  20. package/cpp/common/debug.h +43 -0
  21. package/cpp/common/download.cpp +792 -0
  22. package/cpp/common/download.h +84 -0
  23. package/cpp/common/http.h +84 -0
  24. package/cpp/common/jinja/README.md +88 -0
  25. package/cpp/common/jinja/caps.cpp +285 -0
  26. package/cpp/common/jinja/caps.h +30 -0
  27. package/cpp/common/jinja/lexer.cpp +341 -0
  28. package/cpp/common/jinja/lexer.h +157 -0
  29. package/cpp/common/jinja/parser.cpp +591 -0
  30. package/cpp/common/jinja/parser.h +21 -0
  31. package/cpp/common/jinja/runtime.cpp +867 -0
  32. package/cpp/common/jinja/runtime.h +638 -0
  33. package/cpp/common/jinja/string.cpp +213 -0
  34. package/cpp/common/jinja/string.h +61 -0
  35. package/cpp/common/jinja/utils.h +149 -0
  36. package/cpp/common/jinja/value.cpp +1393 -0
  37. package/cpp/common/jinja/value.h +756 -0
  38. package/cpp/common/json-partial.cpp +324 -0
  39. package/cpp/common/json-partial.h +39 -0
  40. package/cpp/common/json-schema-to-grammar.cpp +1153 -0
  41. package/cpp/common/json-schema-to-grammar.h +43 -0
  42. package/cpp/common/llguidance.cpp +258 -0
  43. package/cpp/common/log.cpp +446 -0
  44. package/cpp/common/log.h +119 -0
  45. package/cpp/common/ngram-cache.cpp +285 -0
  46. package/cpp/common/ngram-cache.h +101 -0
  47. package/cpp/common/ngram-map.cpp +530 -0
  48. package/cpp/common/ngram-map.h +115 -0
  49. package/cpp/common/ngram-mod.cpp +60 -0
  50. package/cpp/common/ngram-mod.h +38 -0
  51. package/cpp/common/peg-parser.cpp +1712 -0
  52. package/cpp/common/peg-parser.h +459 -0
  53. package/cpp/common/preset.cpp +483 -0
  54. package/cpp/common/preset.h +83 -0
  55. package/cpp/common/regex-partial.cpp +204 -0
  56. package/cpp/common/regex-partial.h +56 -0
  57. package/cpp/common/sampling.cpp +745 -0
  58. package/cpp/common/sampling.h +119 -0
  59. package/cpp/common/speculative.cpp +1074 -0
  60. package/cpp/common/speculative.h +41 -0
  61. package/cpp/common/unicode.cpp +64 -0
  62. package/cpp/common/unicode.h +22 -0
  63. package/cpp/ggml/CMakeLists.txt +494 -0
  64. package/cpp/ggml/cmake/GitVars.cmake +22 -0
  65. package/cpp/ggml/cmake/common.cmake +50 -0
  66. package/cpp/ggml/cmake/ggml-config.cmake.in +191 -0
  67. package/cpp/ggml/include/ggml-alloc.h +85 -0
  68. package/cpp/ggml/include/ggml-backend.h +373 -0
  69. package/cpp/ggml/include/ggml-blas.h +25 -0
  70. package/cpp/ggml/include/ggml-cann.h +123 -0
  71. package/cpp/ggml/include/ggml-cpp.h +39 -0
  72. package/cpp/ggml/include/ggml-cpu.h +151 -0
  73. package/cpp/ggml/include/ggml-cuda.h +47 -0
  74. package/cpp/ggml/include/ggml-hexagon.h +19 -0
  75. package/cpp/ggml/include/ggml-metal.h +61 -0
  76. package/cpp/ggml/include/ggml-opencl.h +26 -0
  77. package/cpp/ggml/include/ggml-opt.h +256 -0
  78. package/cpp/ggml/include/ggml-rpc.h +30 -0
  79. package/cpp/ggml/include/ggml-sycl.h +49 -0
  80. package/cpp/ggml/include/ggml-virtgpu.h +14 -0
  81. package/cpp/ggml/include/ggml-vulkan.h +29 -0
  82. package/cpp/ggml/include/ggml-webgpu.h +19 -0
  83. package/cpp/ggml/include/ggml-zdnn.h +17 -0
  84. package/cpp/ggml/include/ggml-zendnn.h +22 -0
  85. package/cpp/ggml/include/ggml.h +2753 -0
  86. package/cpp/ggml/include/gguf.h +204 -0
  87. package/cpp/ggml/src/CMakeLists.txt +492 -0
  88. package/cpp/ggml/src/ggml-alloc.c +1244 -0
  89. package/cpp/ggml/src/ggml-backend-dl.cpp +48 -0
  90. package/cpp/ggml/src/ggml-backend-dl.h +45 -0
  91. package/cpp/ggml/src/ggml-backend-impl.h +255 -0
  92. package/cpp/ggml/src/ggml-backend-reg.cpp +566 -0
  93. package/cpp/ggml/src/ggml-backend.cpp +2270 -0
  94. package/cpp/ggml/src/ggml-blas/CMakeLists.txt +101 -0
  95. package/cpp/ggml/src/ggml-blas/ggml-blas.cpp +518 -0
  96. package/cpp/ggml/src/ggml-common.h +1878 -0
  97. package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +691 -0
  98. package/cpp/ggml/src/ggml-cpu/amx/amx.cpp +247 -0
  99. package/cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  100. package/cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  101. package/cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
  102. package/cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  103. package/cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
  104. package/cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4052 -0
  105. package/cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +4935 -0
  106. package/cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2159 -0
  107. package/cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  108. package/cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
  109. package/cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  110. package/cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2726 -0
  111. package/cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
  112. package/cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  113. package/cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
  114. package/cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
  115. package/cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  116. package/cpp/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
  117. package/cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
  118. package/cpp/ggml/src/ggml-cpu/arch-fallback.h +313 -0
  119. package/cpp/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
  120. package/cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  121. package/cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  122. package/cpp/ggml/src/ggml-cpu/common.h +95 -0
  123. package/cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +529 -0
  124. package/cpp/ggml/src/ggml-cpu/ggml-cpu.c +3734 -0
  125. package/cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +701 -0
  126. package/cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
  127. package/cpp/ggml/src/ggml-cpu/hbm.h +8 -0
  128. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +938 -0
  129. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
  130. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +798 -0
  131. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  132. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4033 -0
  133. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
  134. package/cpp/ggml/src/ggml-cpu/ops.cpp +10978 -0
  135. package/cpp/ggml/src/ggml-cpu/ops.h +116 -0
  136. package/cpp/ggml/src/ggml-cpu/quants.c +1193 -0
  137. package/cpp/ggml/src/ggml-cpu/quants.h +97 -0
  138. package/cpp/ggml/src/ggml-cpu/repack.cpp +3316 -0
  139. package/cpp/ggml/src/ggml-cpu/repack.h +173 -0
  140. package/cpp/ggml/src/ggml-cpu/simd-gemm.h +136 -0
  141. package/cpp/ggml/src/ggml-cpu/simd-mappings.h +1279 -0
  142. package/cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
  143. package/cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
  144. package/cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
  145. package/cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
  146. package/cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
  147. package/cpp/ggml/src/ggml-cpu/traits.h +38 -0
  148. package/cpp/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
  149. package/cpp/ggml/src/ggml-cpu/unary-ops.h +35 -0
  150. package/cpp/ggml/src/ggml-cpu/vec.cpp +629 -0
  151. package/cpp/ggml/src/ggml-cpu/vec.h +1585 -0
  152. package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
  153. package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3232 -0
  154. package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -0
  155. package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +815 -0
  156. package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
  157. package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +827 -0
  158. package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  159. package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +251 -0
  160. package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +666 -0
  161. package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +111 -0
  162. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
  163. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
  164. package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
  165. package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
  166. package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
  167. package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
  168. package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +154 -0
  169. package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +65 -0
  170. package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
  171. package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +470 -0
  172. package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +173 -0
  173. package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
  174. package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +116 -0
  175. package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
  176. package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
  177. package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
  178. package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +176 -0
  179. package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +266 -0
  180. package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
  181. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
  182. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
  183. package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
  184. package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +18 -0
  185. package/cpp/ggml/src/ggml-hexagon/htp/main.c +1150 -0
  186. package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2595 -0
  187. package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +498 -0
  188. package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +167 -0
  189. package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +421 -0
  190. package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +130 -0
  191. package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +384 -0
  192. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
  193. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  194. package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
  195. package/cpp/ggml/src/ggml-hexagon/htp-drv.h +121 -0
  196. package/cpp/ggml/src/ggml-hexagon/libdl.h +79 -0
  197. package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
  198. package/cpp/ggml/src/ggml-hexagon/op-desc.h +153 -0
  199. package/cpp/ggml/src/ggml-impl.h +724 -0
  200. package/cpp/ggml/src/ggml-metal/CMakeLists.txt +124 -0
  201. package/cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
  202. package/cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  203. package/cpp/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
  204. package/cpp/ggml/src/ggml-metal/ggml-metal-context.m +702 -0
  205. package/cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1890 -0
  206. package/cpp/ggml/src/ggml-metal/ggml-metal-device.h +290 -0
  207. package/cpp/ggml/src/ggml-metal/ggml-metal-device.m +1749 -0
  208. package/cpp/ggml/src/ggml-metal/ggml-metal-impl.h +1054 -0
  209. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +4370 -0
  210. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
  211. package/cpp/ggml/src/ggml-metal/ggml-metal.cpp +937 -0
  212. package/cpp/ggml/src/ggml-metal/ggml-metal.metal +9819 -0
  213. package/cpp/ggml/src/ggml-musa/CMakeLists.txt +125 -0
  214. package/cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  215. package/cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  216. package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +150 -0
  217. package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +11553 -0
  218. package/cpp/ggml/src/ggml-opencl/kernels/add.cl +190 -0
  219. package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
  220. package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  221. package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  222. package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
  223. package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  224. package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  225. package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  226. package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +417 -0
  227. package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  228. package/cpp/ggml/src/ggml-opencl/kernels/div.cl +138 -0
  229. package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  230. package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
  231. package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  232. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
  233. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
  234. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
  235. package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
  236. package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  237. package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  238. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  239. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  240. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
  241. package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
  242. package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
  243. package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
  244. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  245. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  246. package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
  247. package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
  248. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  249. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  250. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  251. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
  252. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
  253. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
  254. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
  255. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
  256. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
  257. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  258. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  259. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  260. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  261. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  262. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  263. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
  264. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  265. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  266. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  267. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  268. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
  269. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  270. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  271. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  272. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  273. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  274. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  275. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
  276. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
  277. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
  278. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
  279. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
  280. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  281. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  282. package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
  283. package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
  284. package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  285. package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
  286. package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
  287. package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
  288. package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
  289. package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
  290. package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  291. package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  292. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
  293. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
  294. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
  295. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
  296. package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
  297. package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
  298. package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  299. package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  300. package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  301. package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
  302. package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
  303. package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
  304. package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +117 -0
  305. package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
  306. package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  307. package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
  308. package/cpp/ggml/src/ggml-opt.cpp +1093 -0
  309. package/cpp/ggml/src/ggml-quants.c +5325 -0
  310. package/cpp/ggml/src/ggml-quants.h +106 -0
  311. package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  312. package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2118 -0
  313. package/cpp/ggml/src/ggml-threading.cpp +12 -0
  314. package/cpp/ggml/src/ggml-threading.h +14 -0
  315. package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
  316. package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
  317. package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
  318. package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
  319. package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
  320. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
  321. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
  322. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
  323. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
  324. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
  325. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
  326. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
  327. package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
  328. package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
  329. package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
  330. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
  331. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
  332. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
  333. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
  334. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
  335. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
  336. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
  337. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
  338. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
  339. package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
  340. package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
  341. package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
  342. package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
  343. package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
  344. package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
  345. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
  346. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
  347. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
  348. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
  349. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
  350. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
  351. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
  352. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
  353. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
  354. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
  355. package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
  356. package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
  357. package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
  358. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1231 -0
  359. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3150 -0
  360. package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  361. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
  362. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
  363. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
  364. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +107 -0
  365. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +923 -0
  366. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
  367. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
  368. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +182 -0
  369. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
  370. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +668 -0
  371. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
  372. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  373. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +713 -0
  374. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +103 -0
  375. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +138 -0
  376. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +188 -0
  377. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +194 -0
  378. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
  379. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
  380. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
  381. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
  382. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
  383. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
  384. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
  385. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
  386. package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
  387. package/cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
  388. package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +633 -0
  389. package/cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  390. package/cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  391. package/cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
  392. package/cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
  393. package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
  394. package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
  395. package/cpp/ggml/src/ggml.c +7669 -0
  396. package/cpp/ggml/src/ggml.cpp +26 -0
  397. package/cpp/ggml/src/gguf.cpp +1699 -0
  398. package/cpp/include/llama-cpp.h +32 -0
  399. package/cpp/include/llama.h +1568 -0
  400. package/cpp/mtmd/CMakeLists.txt +98 -0
  401. package/cpp/mtmd/README.md +63 -0
  402. package/cpp/mtmd/clip-graph.h +117 -0
  403. package/cpp/mtmd/clip-impl.h +586 -0
  404. package/cpp/mtmd/clip-model.h +390 -0
  405. package/cpp/mtmd/clip.cpp +4154 -0
  406. package/cpp/mtmd/clip.h +121 -0
  407. package/cpp/mtmd/deprecation-warning.cpp +22 -0
  408. package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +412 -0
  409. package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +280 -0
  410. package/cpp/mtmd/legacy-models/glmedge-surgery.py +33 -0
  411. package/cpp/mtmd/legacy-models/llava_surgery.py +38 -0
  412. package/cpp/mtmd/legacy-models/llava_surgery_v2.py +180 -0
  413. package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +892 -0
  414. package/cpp/mtmd/legacy-models/minicpmv-surgery.py +47 -0
  415. package/cpp/mtmd/models/cogvlm.cpp +98 -0
  416. package/cpp/mtmd/models/conformer.cpp +216 -0
  417. package/cpp/mtmd/models/glm4v.cpp +122 -0
  418. package/cpp/mtmd/models/internvl.cpp +69 -0
  419. package/cpp/mtmd/models/kimik25.cpp +101 -0
  420. package/cpp/mtmd/models/kimivl.cpp +63 -0
  421. package/cpp/mtmd/models/llama4.cpp +96 -0
  422. package/cpp/mtmd/models/llava.cpp +374 -0
  423. package/cpp/mtmd/models/minicpmv.cpp +114 -0
  424. package/cpp/mtmd/models/mobilenetv5.cpp +451 -0
  425. package/cpp/mtmd/models/models.h +128 -0
  426. package/cpp/mtmd/models/nemotron-v2-vl.cpp +35 -0
  427. package/cpp/mtmd/models/paddleocr.cpp +52 -0
  428. package/cpp/mtmd/models/pixtral.cpp +86 -0
  429. package/cpp/mtmd/models/qwen2vl.cpp +183 -0
  430. package/cpp/mtmd/models/qwen3vl.cpp +193 -0
  431. package/cpp/mtmd/models/siglip.cpp +86 -0
  432. package/cpp/mtmd/models/whisper-enc.cpp +115 -0
  433. package/cpp/mtmd/models/youtuvl.cpp +179 -0
  434. package/cpp/mtmd/mtmd-audio.cpp +730 -0
  435. package/cpp/mtmd/mtmd-audio.h +113 -0
  436. package/cpp/mtmd/mtmd-cli.cpp +437 -0
  437. package/cpp/mtmd/mtmd-helper.cpp +521 -0
  438. package/cpp/mtmd/mtmd-helper.h +96 -0
  439. package/cpp/mtmd/mtmd.cpp +1156 -0
  440. package/cpp/mtmd/mtmd.h +319 -0
  441. package/cpp/mtmd/requirements.txt +5 -0
  442. package/cpp/mtmd/test-1.jpeg +0 -0
  443. package/cpp/mtmd/test-2.mp3 +0 -0
  444. package/cpp/mtmd/tests.sh +192 -0
  445. package/cpp/src/CMakeLists.txt +169 -0
  446. package/cpp/src/llama-adapter.cpp +488 -0
  447. package/cpp/src/llama-adapter.h +89 -0
  448. package/cpp/src/llama-arch.cpp +2855 -0
  449. package/cpp/src/llama-arch.h +619 -0
  450. package/cpp/src/llama-batch.cpp +917 -0
  451. package/cpp/src/llama-batch.h +173 -0
  452. package/cpp/src/llama-chat.cpp +896 -0
  453. package/cpp/src/llama-chat.h +71 -0
  454. package/cpp/src/llama-context.cpp +3512 -0
  455. package/cpp/src/llama-context.h +359 -0
  456. package/cpp/src/llama-cparams.cpp +5 -0
  457. package/cpp/src/llama-cparams.h +44 -0
  458. package/cpp/src/llama-grammar.cpp +1464 -0
  459. package/cpp/src/llama-grammar.h +194 -0
  460. package/cpp/src/llama-graph.cpp +2685 -0
  461. package/cpp/src/llama-graph.h +1026 -0
  462. package/cpp/src/llama-hparams.cpp +234 -0
  463. package/cpp/src/llama-hparams.h +339 -0
  464. package/cpp/src/llama-impl.cpp +171 -0
  465. package/cpp/src/llama-impl.h +73 -0
  466. package/cpp/src/llama-io.cpp +15 -0
  467. package/cpp/src/llama-io.h +35 -0
  468. package/cpp/src/llama-kv-cache-iswa.cpp +330 -0
  469. package/cpp/src/llama-kv-cache-iswa.h +137 -0
  470. package/cpp/src/llama-kv-cache.cpp +2271 -0
  471. package/cpp/src/llama-kv-cache.h +388 -0
  472. package/cpp/src/llama-kv-cells.h +533 -0
  473. package/cpp/src/llama-memory-hybrid-iswa.cpp +275 -0
  474. package/cpp/src/llama-memory-hybrid-iswa.h +140 -0
  475. package/cpp/src/llama-memory-hybrid.cpp +268 -0
  476. package/cpp/src/llama-memory-hybrid.h +139 -0
  477. package/cpp/src/llama-memory-recurrent.cpp +1165 -0
  478. package/cpp/src/llama-memory-recurrent.h +182 -0
  479. package/cpp/src/llama-memory.cpp +59 -0
  480. package/cpp/src/llama-memory.h +122 -0
  481. package/cpp/src/llama-mmap.cpp +785 -0
  482. package/cpp/src/llama-mmap.h +92 -0
  483. package/cpp/src/llama-model-loader.cpp +1414 -0
  484. package/cpp/src/llama-model-loader.h +203 -0
  485. package/cpp/src/llama-model-saver.cpp +286 -0
  486. package/cpp/src/llama-model-saver.h +37 -0
  487. package/cpp/src/llama-model.cpp +9253 -0
  488. package/cpp/src/llama-model.h +576 -0
  489. package/cpp/src/llama-quant.cpp +1119 -0
  490. package/cpp/src/llama-quant.h +1 -0
  491. package/cpp/src/llama-sampler.cpp +3885 -0
  492. package/cpp/src/llama-sampler.h +42 -0
  493. package/cpp/src/llama-vocab.cpp +3970 -0
  494. package/cpp/src/llama-vocab.h +187 -0
  495. package/cpp/src/llama.cpp +1313 -0
  496. package/cpp/src/models/afmoe.cpp +191 -0
  497. package/cpp/src/models/apertus.cpp +125 -0
  498. package/cpp/src/models/arcee.cpp +135 -0
  499. package/cpp/src/models/arctic.cpp +138 -0
  500. package/cpp/src/models/arwkv7.cpp +86 -0
  501. package/cpp/src/models/baichuan.cpp +122 -0
  502. package/cpp/src/models/bailingmoe.cpp +144 -0
  503. package/cpp/src/models/bailingmoe2.cpp +135 -0
  504. package/cpp/src/models/bert.cpp +178 -0
  505. package/cpp/src/models/bitnet.cpp +160 -0
  506. package/cpp/src/models/bloom.cpp +101 -0
  507. package/cpp/src/models/chameleon.cpp +178 -0
  508. package/cpp/src/models/chatglm.cpp +132 -0
  509. package/cpp/src/models/codeshell.cpp +111 -0
  510. package/cpp/src/models/cogvlm.cpp +102 -0
  511. package/cpp/src/models/cohere2-iswa.cpp +134 -0
  512. package/cpp/src/models/command-r.cpp +122 -0
  513. package/cpp/src/models/dbrx.cpp +123 -0
  514. package/cpp/src/models/deci.cpp +135 -0
  515. package/cpp/src/models/deepseek.cpp +144 -0
  516. package/cpp/src/models/deepseek2.cpp +262 -0
  517. package/cpp/src/models/delta-net-base.cpp +376 -0
  518. package/cpp/src/models/dots1.cpp +134 -0
  519. package/cpp/src/models/dream.cpp +105 -0
  520. package/cpp/src/models/ernie4-5-moe.cpp +150 -0
  521. package/cpp/src/models/ernie4-5.cpp +110 -0
  522. package/cpp/src/models/eurobert.cpp +97 -0
  523. package/cpp/src/models/exaone-moe.cpp +146 -0
  524. package/cpp/src/models/exaone.cpp +114 -0
  525. package/cpp/src/models/exaone4.cpp +123 -0
  526. package/cpp/src/models/falcon-h1.cpp +111 -0
  527. package/cpp/src/models/falcon.cpp +120 -0
  528. package/cpp/src/models/gemma-embedding.cpp +116 -0
  529. package/cpp/src/models/gemma.cpp +112 -0
  530. package/cpp/src/models/gemma2-iswa.cpp +128 -0
  531. package/cpp/src/models/gemma3.cpp +155 -0
  532. package/cpp/src/models/gemma3n-iswa.cpp +384 -0
  533. package/cpp/src/models/glm4-moe.cpp +170 -0
  534. package/cpp/src/models/glm4.cpp +157 -0
  535. package/cpp/src/models/gpt2.cpp +105 -0
  536. package/cpp/src/models/gptneox.cpp +144 -0
  537. package/cpp/src/models/granite-hybrid.cpp +196 -0
  538. package/cpp/src/models/granite.cpp +211 -0
  539. package/cpp/src/models/grok.cpp +159 -0
  540. package/cpp/src/models/grovemoe.cpp +141 -0
  541. package/cpp/src/models/hunyuan-dense.cpp +132 -0
  542. package/cpp/src/models/hunyuan-moe.cpp +154 -0
  543. package/cpp/src/models/internlm2.cpp +120 -0
  544. package/cpp/src/models/jais.cpp +86 -0
  545. package/cpp/src/models/jais2.cpp +123 -0
  546. package/cpp/src/models/jamba.cpp +106 -0
  547. package/cpp/src/models/kimi-linear.cpp +392 -0
  548. package/cpp/src/models/lfm2.cpp +190 -0
  549. package/cpp/src/models/llada-moe.cpp +122 -0
  550. package/cpp/src/models/llada.cpp +99 -0
  551. package/cpp/src/models/llama-iswa.cpp +178 -0
  552. package/cpp/src/models/llama.cpp +168 -0
  553. package/cpp/src/models/maincoder.cpp +117 -0
  554. package/cpp/src/models/mamba-base.cpp +285 -0
  555. package/cpp/src/models/mamba.cpp +54 -0
  556. package/cpp/src/models/mimo2-iswa.cpp +123 -0
  557. package/cpp/src/models/minicpm3.cpp +200 -0
  558. package/cpp/src/models/minimax-m2.cpp +124 -0
  559. package/cpp/src/models/mistral3.cpp +160 -0
  560. package/cpp/src/models/models.h +684 -0
  561. package/cpp/src/models/modern-bert.cpp +109 -0
  562. package/cpp/src/models/mpt.cpp +126 -0
  563. package/cpp/src/models/nemotron-h.cpp +148 -0
  564. package/cpp/src/models/nemotron.cpp +122 -0
  565. package/cpp/src/models/neo-bert.cpp +104 -0
  566. package/cpp/src/models/olmo.cpp +121 -0
  567. package/cpp/src/models/olmo2.cpp +150 -0
  568. package/cpp/src/models/olmoe.cpp +124 -0
  569. package/cpp/src/models/openai-moe-iswa.cpp +127 -0
  570. package/cpp/src/models/openelm.cpp +124 -0
  571. package/cpp/src/models/orion.cpp +123 -0
  572. package/cpp/src/models/paddleocr.cpp +122 -0
  573. package/cpp/src/models/pangu-embedded.cpp +121 -0
  574. package/cpp/src/models/phi2.cpp +121 -0
  575. package/cpp/src/models/phi3.cpp +152 -0
  576. package/cpp/src/models/plamo.cpp +110 -0
  577. package/cpp/src/models/plamo2.cpp +318 -0
  578. package/cpp/src/models/plamo3.cpp +128 -0
  579. package/cpp/src/models/plm.cpp +169 -0
  580. package/cpp/src/models/qwen.cpp +108 -0
  581. package/cpp/src/models/qwen2.cpp +126 -0
  582. package/cpp/src/models/qwen2moe.cpp +151 -0
  583. package/cpp/src/models/qwen2vl.cpp +117 -0
  584. package/cpp/src/models/qwen3.cpp +117 -0
  585. package/cpp/src/models/qwen35.cpp +386 -0
  586. package/cpp/src/models/qwen35moe.cpp +420 -0
  587. package/cpp/src/models/qwen3moe.cpp +124 -0
  588. package/cpp/src/models/qwen3next.cpp +525 -0
  589. package/cpp/src/models/qwen3vl-moe.cpp +140 -0
  590. package/cpp/src/models/qwen3vl.cpp +132 -0
  591. package/cpp/src/models/refact.cpp +94 -0
  592. package/cpp/src/models/rnd1.cpp +126 -0
  593. package/cpp/src/models/rwkv6-base.cpp +164 -0
  594. package/cpp/src/models/rwkv6.cpp +94 -0
  595. package/cpp/src/models/rwkv6qwen2.cpp +86 -0
  596. package/cpp/src/models/rwkv7-base.cpp +137 -0
  597. package/cpp/src/models/rwkv7.cpp +90 -0
  598. package/cpp/src/models/seed-oss.cpp +124 -0
  599. package/cpp/src/models/smallthinker.cpp +126 -0
  600. package/cpp/src/models/smollm3.cpp +128 -0
  601. package/cpp/src/models/stablelm.cpp +146 -0
  602. package/cpp/src/models/starcoder.cpp +100 -0
  603. package/cpp/src/models/starcoder2.cpp +121 -0
  604. package/cpp/src/models/step35-iswa.cpp +168 -0
  605. package/cpp/src/models/t5-dec.cpp +166 -0
  606. package/cpp/src/models/t5-enc.cpp +96 -0
  607. package/cpp/src/models/wavtokenizer-dec.cpp +149 -0
  608. package/cpp/src/models/xverse.cpp +108 -0
  609. package/cpp/src/unicode-data.cpp +7034 -0
  610. package/cpp/src/unicode-data.h +20 -0
  611. package/cpp/src/unicode.cpp +1103 -0
  612. package/cpp/src/unicode.h +111 -0
  613. package/cpp/vendor/nlohmann/json.hpp +25526 -0
  614. package/cpp/vendor/nlohmann/json_fwd.hpp +187 -0
  615. package/cpp/vendor/stb/stb_image.h +7988 -0
  616. package/ios/LocalLLM-Bridging-Header.h +2 -0
  617. package/ios/LocalLLM.h +5 -0
  618. package/ios/LocalLLM.mm +1267 -0
  619. package/local-llm-rn.podspec +60 -0
  620. package/package.json +35 -0
  621. package/src/NativeLocalLLM.ts +73 -0
  622. package/src/device.ts +50 -0
  623. package/src/download-adapter.ts +17 -0
  624. package/src/index.ts +21 -0
  625. package/src/native-bridge.ts +142 -0
  626. package/src/rn-downloader.ts +37 -0
@@ -0,0 +1,521 @@
1
+ // fix problem with std::min and std::max
2
+ #if defined(_WIN32)
3
+ #define WIN32_LEAN_AND_MEAN
4
+ #ifndef NOMINMAX
5
+ # define NOMINMAX
6
+ #endif
7
+ #include <windows.h>
8
+ #endif
9
+
10
+ #include "mtmd.h"
11
+ #include "mtmd-helper.h"
12
+ #include "llama.h"
13
+
14
+ #include <algorithm>
15
+ #include <cinttypes>
16
+ #include <vector>
17
+
18
+ //#define MTMD_AUDIO_DEBUG
19
+
20
+ #define MINIAUDIO_IMPLEMENTATION
21
+ #ifndef MTMD_AUDIO_DEBUG
22
+ # define MA_NO_ENCODING
23
+ #endif
24
+ #define MA_NO_DEVICE_IO
25
+ #define MA_NO_RESOURCE_MANAGER
26
+ #define MA_NO_NODE_GRAPH
27
+ #define MA_NO_ENGINE
28
+ #define MA_NO_GENERATION
29
+ #define MA_API static
30
+ #include "miniaudio/miniaudio.h"
31
+
32
+ #define STB_IMAGE_IMPLEMENTATION
33
+ #include "stb/stb_image.h"
34
+
35
+ #ifdef MTMD_INTERNAL_HEADER
36
+ #error "mtmd-helper is a public library outside of mtmd. it must not include internal headers"
37
+ #endif
38
+
39
+ //
40
+ // internal logging functions
41
+ //
42
+
43
+ struct mtmd_helper_logger {
44
+ ggml_log_callback default_callback = [](ggml_log_level level, const char * text, void * user_data) {
45
+ (void) level;
46
+ (void) user_data;
47
+ fputs(text, stderr);
48
+ fflush(stderr);
49
+ };
50
+
51
+ ggml_log_callback log_callback = default_callback;
52
+ void * log_callback_user_data;
53
+
54
+ void log_v(enum ggml_log_level level, const char * format, va_list args) {
55
+ if (format == NULL) {
56
+ return;
57
+ }
58
+ va_list args_copy;
59
+ va_copy(args_copy, args);
60
+ char buffer[128];
61
+ int len = vsnprintf(buffer, 128, format, args);
62
+ if (len < 128) {
63
+ log_callback(level, buffer, log_callback_user_data);
64
+ } else {
65
+ char * buffer2 = (char *) calloc(len + 1, sizeof(char));
66
+ vsnprintf(buffer2, len + 1, format, args_copy);
67
+ buffer2[len] = 0;
68
+ log_callback(level, buffer2, log_callback_user_data);
69
+ free(buffer2);
70
+ }
71
+ va_end(args_copy);
72
+ }
73
+
74
+ void log(enum ggml_log_level level, const char * format, ...) {
75
+ va_list args;
76
+ va_start(args, format);
77
+ log_v(level, format, args);
78
+ va_end(args);
79
+ }
80
+ } g_logger;
81
+
82
+ #define LOG_INF(...) g_logger.log(GGML_LOG_LEVEL_INFO, __VA_ARGS__)
83
+ #define LOG_WRN(...) g_logger.log(GGML_LOG_LEVEL_WARN, __VA_ARGS__)
84
+ #define LOG_ERR(...) g_logger.log(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
85
+
86
+ void mtmd_helper_log_set(ggml_log_callback log_callback, void * user_data) {
87
+ if (log_callback == nullptr) {
88
+ log_callback = g_logger.default_callback;
89
+ }
90
+ g_logger.log_callback = log_callback;
91
+ g_logger.log_callback_user_data = user_data;
92
+ mtmd_log_set(log_callback, user_data);
93
+ }
94
+
95
+ //
96
+ // helper functions
97
+ //
98
+
99
+ size_t mtmd_helper_get_n_tokens(const mtmd_input_chunks * chunks) {
100
+ size_t n_tokens = 0;
101
+ for (size_t i = 0; i < mtmd_input_chunks_size(chunks); i++) {
102
+ auto chunk = mtmd_input_chunks_get(chunks, i);
103
+ n_tokens += mtmd_input_chunk_get_n_tokens(chunk);
104
+ }
105
+ return n_tokens;
106
+ }
107
+
108
+ llama_pos mtmd_helper_get_n_pos(const mtmd_input_chunks * chunks) {
109
+ llama_pos n_pos = 0;
110
+ for (size_t i = 0; i < mtmd_input_chunks_size(chunks); i++) {
111
+ auto chunk = mtmd_input_chunks_get(chunks, i);
112
+ n_pos += mtmd_input_chunk_get_n_pos(chunk);
113
+ }
114
+ return n_pos;
115
+ }
116
+
117
+ // helper struct to make working with embd batch easier
118
+ // note: this will be removed after llama_batch_ext refactoring
119
+ struct decode_embd_batch {
120
+ int n_pos_per_embd;
121
+ int n_mmproj_embd;
122
+ std::vector<llama_pos> pos;
123
+ std::vector<llama_pos> pos_view; // used by mrope
124
+ std::vector<int32_t> n_seq_id;
125
+ std::vector<llama_seq_id> seq_id_0;
126
+ std::vector<llama_seq_id *> seq_ids;
127
+ std::vector<int8_t> logits;
128
+ llama_batch batch;
129
+ decode_embd_batch(float * embd, int32_t n_tokens, int n_pos_per_embd, int n_mmproj_embd) : n_pos_per_embd(n_pos_per_embd), n_mmproj_embd(n_mmproj_embd) {
130
+ pos .resize(n_tokens * n_pos_per_embd);
131
+ n_seq_id.resize(n_tokens);
132
+ seq_ids .resize(n_tokens + 1);
133
+ logits .resize(n_tokens);
134
+ seq_id_0.resize(1);
135
+ seq_ids [n_tokens] = nullptr;
136
+ batch = {
137
+ /*n_tokens =*/ n_tokens,
138
+ /*tokens =*/ nullptr,
139
+ /*embd =*/ embd,
140
+ /*pos =*/ pos.data(),
141
+ /*n_seq_id =*/ n_seq_id.data(),
142
+ /*seq_id =*/ seq_ids.data(),
143
+ /*logits =*/ logits.data(),
144
+ };
145
+ }
146
+
147
+ void set_position_normal(llama_pos pos_0, llama_seq_id seq_id) {
148
+ seq_id_0[0] = seq_id;
149
+ for (int i = 0; i < batch.n_tokens; i++) {
150
+ batch.pos [i] = pos_0 + i;
151
+ batch.n_seq_id[i] = 1;
152
+ batch.seq_id [i] = seq_id_0.data();
153
+ batch.logits [i] = false;
154
+ }
155
+ }
156
+
157
+ // M-RoPE for image
158
+ void set_position_mrope_2d(llama_pos pos_0, int nx, int ny, llama_seq_id seq_id) {
159
+ GGML_ASSERT(n_pos_per_embd == 4);
160
+ seq_id_0[0] = seq_id;
161
+ for (int y = 0; y < ny; y++) {
162
+ for (int x = 0; x < nx; x++) {
163
+ int i = y * nx + x;
164
+ pos[i ] = pos_0;
165
+ pos[i + batch.n_tokens ] = pos_0 + y;
166
+ pos[i + batch.n_tokens * 2] = pos_0 + x;
167
+ pos[i + batch.n_tokens * 3] = 0; // last pos dim is unused
168
+ }
169
+ }
170
+ for (int i = 0; i < batch.n_tokens; i++) {
171
+ batch.n_seq_id[i] = 1;
172
+ batch.seq_id [i] = seq_id_0.data();
173
+ batch.logits [i] = false;
174
+ }
175
+ }
176
+
177
+ // M-RoPE for audio
178
+ void set_position_mrope_1d(llama_pos pos_0, llama_seq_id seq_id) {
179
+ GGML_ASSERT(n_pos_per_embd == 4);
180
+ seq_id_0[0] = seq_id;
181
+ for (int i = 0; i < batch.n_tokens; i++) {
182
+ pos[i ] = pos_0 + i;
183
+ pos[i + batch.n_tokens ] = pos_0 + i;
184
+ pos[i + batch.n_tokens * 2] = pos_0 + i;
185
+ pos[i + batch.n_tokens * 3] = 0; // last pos dim is unused
186
+ }
187
+ for (int i = 0; i < batch.n_tokens; i++) {
188
+ batch.n_seq_id[i] = 1;
189
+ batch.seq_id [i] = seq_id_0.data();
190
+ batch.logits [i] = false;
191
+ }
192
+ }
193
+
194
+ llama_batch get_view(int offset, int n_tokens) {
195
+ llama_pos * pos_ptr;
196
+ pos_view.clear();
197
+ pos_view.reserve(n_tokens * n_pos_per_embd);
198
+ if (n_pos_per_embd > 1) {
199
+ // mrope
200
+ // for example, with layout of src: 1234...1234...1234...1234...
201
+ // offset 2 will give us dst: 34...34...34...34...
202
+ for (int i = 0; i < n_pos_per_embd; i++) {
203
+ // assume n_tokens is less than or equal to batch.n_tokens
204
+ // batch.n_tokens is number of **total** tokens
205
+ // n_tokens is number of viewed token
206
+ size_t src_idx = i * batch.n_tokens + offset;
207
+ pos_view.insert(pos_view.end(),
208
+ pos.data() + src_idx,
209
+ pos.data() + src_idx + n_tokens);
210
+ }
211
+ pos_ptr = pos_view.data();
212
+ } else {
213
+ // normal
214
+ pos_ptr = pos.data() + offset;
215
+ }
216
+ return {
217
+ /*n_tokens =*/ n_tokens,
218
+ /*tokens =*/ nullptr,
219
+ /*embd =*/ batch.embd + offset * n_mmproj_embd,
220
+ /*pos =*/ pos_ptr,
221
+ /*n_seq_id =*/ batch.n_seq_id + offset,
222
+ /*seq_id =*/ batch.seq_id + offset,
223
+ /*logits =*/ batch.logits + offset,
224
+ };
225
+ }
226
+ };
227
+
228
+ // Helper function for decoding an image whose embeddings have already been calculated
229
+ int32_t mtmd_helper_decode_image_chunk(
230
+ mtmd_context * ctx,
231
+ struct llama_context * lctx,
232
+ const mtmd_input_chunk * chunk,
233
+ float * encoded_embd,
234
+ llama_pos n_past,
235
+ llama_seq_id seq_id,
236
+ int32_t n_batch,
237
+ llama_pos * new_n_past) {
238
+ auto chunk_type = mtmd_input_chunk_get_type(chunk);
239
+ const char * name = chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE ? "image" : "audio";
240
+ if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
241
+ LOG_ERR("failed to decode chunk: input chunk not of image/audio type\n");
242
+ return -1;
243
+ }
244
+
245
+ const llama_model * model = llama_get_model(lctx);
246
+ int n_mmproj_embd = llama_model_n_embd_inp(model);
247
+ int n_pos_per_embd = mtmd_decode_use_mrope(ctx) ? 4 : 1;
248
+
249
+ int32_t n_tokens = mtmd_input_chunk_get_n_tokens(chunk);
250
+ int32_t i_batch = 0;
251
+ int32_t n_img_batches = (n_tokens + n_batch - 1) / n_batch;
252
+ decode_embd_batch batch_embd(encoded_embd, n_tokens, n_pos_per_embd, n_mmproj_embd);
253
+
254
+ if (mtmd_decode_use_mrope(ctx)) {
255
+ if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
256
+ const auto image_tokens = mtmd_input_chunk_get_tokens_image(chunk);
257
+ if (!image_tokens) {
258
+ LOG_ERR("failed to decode chunk: image tokens are null\n");
259
+ return -1;
260
+ }
261
+ const int nx = mtmd_image_tokens_get_nx(image_tokens);
262
+ const int ny = mtmd_image_tokens_get_ny(image_tokens);
263
+ batch_embd.set_position_mrope_2d(n_past, nx, ny, seq_id);
264
+ } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
265
+ batch_embd.set_position_mrope_1d(n_past, seq_id);
266
+ } else {
267
+ GGML_ABORT("invalid chunk type for M-RoPE");
268
+ }
269
+ } else {
270
+ batch_embd.set_position_normal(n_past, seq_id);
271
+ }
272
+
273
+ if (mtmd_decode_use_non_causal(ctx)) {
274
+ llama_set_causal_attn(lctx, false);
275
+ // TODO @ngxson : need to make sure only one image is processed at a time, and n_ubatch must be enough to hold the image
276
+ }
277
+
278
+ while (i_batch < n_img_batches) { // split into batches
279
+ int pos_offset = i_batch*n_batch;
280
+ int n_tokens_batch = std::min(n_batch, n_tokens - pos_offset);
281
+ llama_batch batch_embd_view = batch_embd.get_view(pos_offset, n_tokens_batch);
282
+
283
+ LOG_INF("decoding %s batch %d/%d, n_tokens_batch = %d\n", name, i_batch+1, n_img_batches, n_tokens_batch);
284
+
285
+ int64_t t1 = ggml_time_ms();
286
+ int32_t ret = llama_decode(lctx, batch_embd_view);
287
+ if (ret != 0) {
288
+ LOG_ERR("failed to decode %s\n", name);
289
+ llama_set_causal_attn(lctx, true); // restore causal attn
290
+ return ret;
291
+ }
292
+
293
+ LOG_INF("%s decoded (batch %d/%d) in %" PRId64 " ms\n", name, i_batch+1, n_img_batches, ggml_time_ms() - t1);
294
+
295
+ i_batch++;
296
+ }
297
+
298
+ n_past += mtmd_input_chunk_get_n_pos(chunk);
299
+ *new_n_past = n_past;
300
+
301
+ if (mtmd_decode_use_non_causal(ctx)) {
302
+ llama_set_causal_attn(lctx, true);
303
+ }
304
+ return 0;
305
+ }
306
+
307
+ int32_t mtmd_helper_eval_chunk_single(mtmd_context * ctx,
308
+ struct llama_context * lctx,
309
+ const mtmd_input_chunk * chunk,
310
+ llama_pos n_past,
311
+ llama_seq_id seq_id,
312
+ int32_t n_batch,
313
+ bool logits_last,
314
+ llama_pos * new_n_past) {
315
+ int32_t ret;
316
+ llama_batch text_batch = llama_batch_init(n_batch, 0, 1);
317
+ auto chunk_type = mtmd_input_chunk_get_type(chunk);
318
+
319
+ if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
320
+ size_t n_tokens;
321
+ const auto tokens = mtmd_input_chunk_get_tokens_text(chunk, &n_tokens);
322
+ // LOG_INF("decoding text chunk, n_tokens = %zu\n", n_tokens);
323
+ size_t i = 0;
324
+ while (i < n_tokens) { // split into batches
325
+ text_batch.n_tokens = 0; // clear the batch
326
+ for (; i < n_tokens && text_batch.n_tokens < n_batch; i++) {
327
+ int32_t j = text_batch.n_tokens;
328
+ text_batch.token [j] = tokens[i];
329
+ text_batch.pos [j] = n_past++;
330
+ text_batch.n_seq_id[j] = 1;
331
+ text_batch.seq_id [j][0] = seq_id;
332
+ text_batch.logits [j] = false;
333
+
334
+ text_batch.n_tokens++;
335
+ }
336
+ bool is_last_token = (i == n_tokens);
337
+ if (logits_last && is_last_token) {
338
+ text_batch.logits[text_batch.n_tokens - 1] = true;
339
+ }
340
+ ret = llama_decode(lctx, text_batch);
341
+ if (ret != 0) {
342
+ LOG_ERR("failed to decode text\n");
343
+ llama_batch_free(text_batch);
344
+ return ret;
345
+ }
346
+ *new_n_past += text_batch.n_tokens;
347
+ }
348
+
349
+ } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE || chunk_type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
350
+ const char * name = chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE ? "image" : "audio";
351
+ int64_t t0 = ggml_time_ms();
352
+
353
+ LOG_INF("encoding %s slice...\n", name);
354
+
355
+ ret = mtmd_encode_chunk(ctx, chunk);
356
+ if (ret != 0) {
357
+ LOG_ERR("failed to encode %s slice\n", name);
358
+ llama_batch_free(text_batch);
359
+ return ret;
360
+ }
361
+
362
+ LOG_INF("%s slice encoded in %" PRId64 " ms\n", name, ggml_time_ms() - t0);
363
+
364
+ float * embd = mtmd_get_output_embd(ctx);
365
+ ret = mtmd_helper_decode_image_chunk(ctx, lctx, chunk, embd, n_past, seq_id, n_batch, new_n_past);
366
+ if (ret != 0) {
367
+ LOG_ERR("failed to decode %s\n", name);
368
+ llama_batch_free(text_batch);
369
+ return ret;
370
+ }
371
+ } else {
372
+ GGML_ABORT("chunk type not supported");
373
+ }
374
+
375
+ llama_batch_free(text_batch);
376
+ return 0;
377
+ }
378
+
379
+ int32_t mtmd_helper_eval_chunks(mtmd_context * ctx,
380
+ struct llama_context * lctx,
381
+ const mtmd_input_chunks * chunks,
382
+ llama_pos n_past,
383
+ llama_seq_id seq_id,
384
+ int32_t n_batch,
385
+ bool logits_last,
386
+ llama_pos * new_n_past) {
387
+ size_t n_chunks = mtmd_input_chunks_size(chunks);
388
+ if (n_chunks == 0) {
389
+ LOG_WRN("no chunks to eval\n");
390
+ return 0;
391
+ }
392
+
393
+ for (size_t i = 0; i < n_chunks; i++) {
394
+ bool chunk_logits_last = (i == n_chunks - 1) && logits_last;
395
+ auto chunk = mtmd_input_chunks_get(chunks, i);
396
+
397
+ int32_t res = mtmd_helper_eval_chunk_single(ctx, lctx, chunk, n_past, seq_id, n_batch, chunk_logits_last, &n_past);
398
+ if (res != 0) {
399
+ LOG_ERR("failed to eval chunk %zu\n", i);
400
+ return res;
401
+ }
402
+ *new_n_past = n_past;
403
+ }
404
+
405
+ return 0;
406
+ }
407
+
408
+ namespace audio_helpers {
409
+
410
+ static bool is_audio_file(const char * buf, size_t len) {
411
+ if (len < 12) {
412
+ return false;
413
+ }
414
+
415
+ // RIFF ref: https://en.wikipedia.org/wiki/Resource_Interchange_File_Format
416
+ // WAV ref: https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
417
+ bool is_wav = memcmp(buf, "RIFF", 4) == 0 && memcmp(buf + 8, "WAVE", 4) == 0;
418
+ bool is_mp3 = len >= 3 && (
419
+ memcmp(buf, "ID3", 3) == 0 ||
420
+ // Check for MPEG sync word (simplified check)
421
+ ((unsigned char)buf[0] == 0xFF && ((unsigned char)buf[1] & 0xE0) == 0xE0)
422
+ );
423
+ bool is_flac = memcmp(buf, "fLaC", 4) == 0;
424
+
425
+ return is_wav || is_mp3 || is_flac;
426
+ }
427
+
428
+ // returns true if the buffer is a valid audio file
429
+ static bool decode_audio_from_buf(const unsigned char * buf_in, size_t len, int target_sampler_rate, std::vector<float> & pcmf32_mono) {
430
+ ma_result result;
431
+ const int channels = 1;
432
+ ma_decoder_config decoder_config = ma_decoder_config_init(ma_format_f32, channels, target_sampler_rate);
433
+ ma_decoder decoder;
434
+
435
+ result = ma_decoder_init_memory(buf_in, len, &decoder_config, &decoder);
436
+ if (result != MA_SUCCESS) {
437
+ return false;
438
+ }
439
+
440
+ ma_uint64 frame_count;
441
+ ma_uint64 frames_read;
442
+ result = ma_decoder_get_length_in_pcm_frames(&decoder, &frame_count);
443
+ if (result != MA_SUCCESS) {
444
+ ma_decoder_uninit(&decoder);
445
+ return false;
446
+ }
447
+
448
+ pcmf32_mono.resize(frame_count);
449
+ result = ma_decoder_read_pcm_frames(&decoder, pcmf32_mono.data(), frame_count, &frames_read);
450
+ if (result != MA_SUCCESS) {
451
+ ma_decoder_uninit(&decoder);
452
+ return false;
453
+ }
454
+
455
+ #ifdef MTMD_AUDIO_DEBUG
456
+ // save audio to wav file
457
+ ma_encoder_config config = ma_encoder_config_init(ma_encoding_format_wav, ma_format_f32, 1, target_sampler_rate);
458
+ ma_encoder encoder;
459
+ ma_encoder_init_file("output.wav", &config, &encoder);
460
+ ma_encoder_write_pcm_frames(&encoder, pcmf32_mono.data(), pcmf32_mono.size(), &frames_read);
461
+ ma_encoder_uninit(&encoder);
462
+ #endif
463
+
464
+ ma_decoder_uninit(&decoder);
465
+ return true;
466
+ }
467
+
468
+ } // namespace audio_helpers
469
+
470
+ mtmd_bitmap * mtmd_helper_bitmap_init_from_buf(mtmd_context * ctx, const unsigned char * buf, size_t len) {
471
+ if (audio_helpers::is_audio_file((const char *)buf, len)) {
472
+ std::vector<float> pcmf32;
473
+ int bitrate = mtmd_get_audio_bitrate(ctx);
474
+ if (bitrate < 0) {
475
+ LOG_ERR("This model does not support audio input\n");
476
+ return nullptr;
477
+ }
478
+ if (!audio_helpers::decode_audio_from_buf(buf, len, bitrate, pcmf32)) {
479
+ LOG_ERR("Unable to read WAV audio file from buffer\n");
480
+ return nullptr;
481
+ }
482
+ return mtmd_bitmap_init_from_audio(pcmf32.size(), pcmf32.data());
483
+ }
484
+
485
+ // otherwise, we assume it's an image
486
+ mtmd_bitmap * result = nullptr;
487
+ {
488
+ int nx, ny, nc;
489
+ auto * data = stbi_load_from_memory(buf, len, &nx, &ny, &nc, 3);
490
+ if (!data) {
491
+ LOG_ERR("%s: failed to decode image bytes\n", __func__);
492
+ return nullptr;
493
+ }
494
+ result = mtmd_bitmap_init(nx, ny, data);
495
+ stbi_image_free(data);
496
+ }
497
+ return result;
498
+ }
499
+
500
+ mtmd_bitmap * mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * fname) {
501
+ std::vector<unsigned char> buf;
502
+ FILE * f = fopen(fname, "rb");
503
+ if (!f) {
504
+ LOG_ERR("Unable to open file %s: %s\n", fname, strerror(errno));
505
+ return nullptr;
506
+ }
507
+
508
+ fseek(f, 0, SEEK_END);
509
+ long file_size = ftell(f);
510
+ fseek(f, 0, SEEK_SET);
511
+ buf.resize(file_size);
512
+
513
+ size_t n_read = fread(buf.data(), 1, file_size, f);
514
+ fclose(f);
515
+ if (n_read != (size_t)file_size) {
516
+ LOG_ERR("Failed to read entire file %s", fname);
517
+ return nullptr;
518
+ }
519
+
520
+ return mtmd_helper_bitmap_init_from_buf(ctx, buf.data(), buf.size());
521
+ }
@@ -0,0 +1,96 @@
1
+ #ifndef MTMD_HELPER_H
2
+ #define MTMD_HELPER_H
3
+
4
+ #include "ggml.h"
5
+ #include "llama.h"
6
+ #include "mtmd.h"
7
+
8
+ #include <stddef.h>
9
+ #include <stdint.h>
10
+ #include <stdbool.h>
11
+
12
+ #ifdef __cplusplus
13
+ extern "C" {
14
+ #endif
15
+
16
+ //
17
+ // libmtmd helper functions
18
+ //
19
+ // Please note that these helpers are not guaranteed to be stable.
20
+ // BREAKING CHANGES are expected.
21
+ //
22
+
23
+ // Set callback for all future logging events.
24
+ // If this is not called, or NULL is supplied, everything is output on stderr.
25
+ // Note: this also call mtmd_log_set() internally
26
+ MTMD_API void mtmd_helper_log_set(ggml_log_callback log_callback, void * user_data);
27
+
28
+ // helper function to construct a mtmd_bitmap from a file
29
+ // it calls mtmd_helper_bitmap_init_from_buf() internally
30
+ // returns nullptr on failure
31
+ // this function is thread-safe
32
+ MTMD_API mtmd_bitmap * mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * fname);
33
+
34
+ // helper function to construct a mtmd_bitmap from a buffer containing a file
35
+ // supported formats:
36
+ // image: formats supported by stb_image: jpg, png, bmp, gif, etc.
37
+ // audio: formats supported by miniaudio: wav, mp3, flac
38
+ // note: audio files will be auto-detected based on magic bytes
39
+ // returns nullptr on failure
40
+ // this function is thread-safe
41
+ MTMD_API mtmd_bitmap * mtmd_helper_bitmap_init_from_buf(mtmd_context * ctx, const unsigned char * buf, size_t len);
42
+
43
+ // helper to count the total number of tokens from a list of chunks, useful to keep track of KV cache
44
+ MTMD_API size_t mtmd_helper_get_n_tokens(const mtmd_input_chunks * chunks);
45
+
46
+ // helper to count the total position of tokens from a list of chunks, useful to keep track of n_past
47
+ // normally, n_pos is equal to n_tokens, but for M-RoPE it is different
48
+ MTMD_API llama_pos mtmd_helper_get_n_pos(const mtmd_input_chunks * chunks);
49
+
50
+ // helper function that automatically:
51
+ // 1. run llama_decode() on text chunks
52
+ // 2. run mtmd_encode() on image chunks, then mtmd_get_output_embd() and then llama_decode()
53
+ // if any of the mtmd_encode() or llama_decode() calls return non-zero, stop and forward the error
54
+ // otherwise, returns 0 on success
55
+ // this function is NOT thread-safe
56
+ MTMD_API int32_t mtmd_helper_eval_chunks(mtmd_context * ctx,
57
+ struct llama_context * lctx,
58
+ const mtmd_input_chunks * chunks,
59
+ llama_pos n_past,
60
+ llama_seq_id seq_id,
61
+ int32_t n_batch,
62
+ bool logits_last,
63
+ llama_pos * new_n_past);
64
+
65
+ // works like mtmd_helper_eval_chunks(), but only for a single chunk
66
+ // this function is NOT thread-safe
67
+ MTMD_API int32_t mtmd_helper_eval_chunk_single(mtmd_context * ctx,
68
+ struct llama_context * lctx,
69
+ const mtmd_input_chunk * chunk,
70
+ llama_pos n_past,
71
+ llama_seq_id seq_id,
72
+ int32_t n_batch,
73
+ bool logits_last,
74
+ llama_pos * new_n_past);
75
+
76
+ // helper function to decode an image whose embeddings have already been calculated
77
+ // this helper will handle batching and pre/post decoding setup (for ex. gemma 3 requires non-causal attention)
78
+ // ret 0 on success, -1 on chunk not being a valid image chunk, 1 on decode failure
79
+ MTMD_API int32_t mtmd_helper_decode_image_chunk(mtmd_context * ctx,
80
+ struct llama_context * lctx,
81
+ const mtmd_input_chunk * chunk,
82
+ float * encoded_embd,
83
+ llama_pos n_past,
84
+ llama_seq_id seq_id,
85
+ int32_t n_batch,
86
+ llama_pos * new_n_past);
87
+
88
+ #ifdef __cplusplus
89
+ } // extern "C"
90
+ #endif
91
+
92
+ //
93
+ // C++ wrappers
94
+ //
95
+
96
+ #endif