local-llm-rn 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (626) hide show
  1. package/cpp/CMakeLists.txt +285 -0
  2. package/cpp/common/CMakeLists.txt +149 -0
  3. package/cpp/common/arg.cpp +3799 -0
  4. package/cpp/common/arg.h +131 -0
  5. package/cpp/common/base64.hpp +392 -0
  6. package/cpp/common/build-info.cpp.in +4 -0
  7. package/cpp/common/chat-parser-xml-toolcall.cpp +879 -0
  8. package/cpp/common/chat-parser-xml-toolcall.h +45 -0
  9. package/cpp/common/chat-parser.cpp +1649 -0
  10. package/cpp/common/chat-parser.h +133 -0
  11. package/cpp/common/chat-peg-parser.cpp +124 -0
  12. package/cpp/common/chat-peg-parser.h +105 -0
  13. package/cpp/common/chat.cpp +3355 -0
  14. package/cpp/common/chat.h +252 -0
  15. package/cpp/common/common.cpp +1824 -0
  16. package/cpp/common/common.h +930 -0
  17. package/cpp/common/console.cpp +1137 -0
  18. package/cpp/common/console.h +41 -0
  19. package/cpp/common/debug.cpp +167 -0
  20. package/cpp/common/debug.h +43 -0
  21. package/cpp/common/download.cpp +792 -0
  22. package/cpp/common/download.h +84 -0
  23. package/cpp/common/http.h +84 -0
  24. package/cpp/common/jinja/README.md +88 -0
  25. package/cpp/common/jinja/caps.cpp +285 -0
  26. package/cpp/common/jinja/caps.h +30 -0
  27. package/cpp/common/jinja/lexer.cpp +341 -0
  28. package/cpp/common/jinja/lexer.h +157 -0
  29. package/cpp/common/jinja/parser.cpp +591 -0
  30. package/cpp/common/jinja/parser.h +21 -0
  31. package/cpp/common/jinja/runtime.cpp +867 -0
  32. package/cpp/common/jinja/runtime.h +638 -0
  33. package/cpp/common/jinja/string.cpp +213 -0
  34. package/cpp/common/jinja/string.h +61 -0
  35. package/cpp/common/jinja/utils.h +149 -0
  36. package/cpp/common/jinja/value.cpp +1393 -0
  37. package/cpp/common/jinja/value.h +756 -0
  38. package/cpp/common/json-partial.cpp +324 -0
  39. package/cpp/common/json-partial.h +39 -0
  40. package/cpp/common/json-schema-to-grammar.cpp +1153 -0
  41. package/cpp/common/json-schema-to-grammar.h +43 -0
  42. package/cpp/common/llguidance.cpp +258 -0
  43. package/cpp/common/log.cpp +446 -0
  44. package/cpp/common/log.h +119 -0
  45. package/cpp/common/ngram-cache.cpp +285 -0
  46. package/cpp/common/ngram-cache.h +101 -0
  47. package/cpp/common/ngram-map.cpp +530 -0
  48. package/cpp/common/ngram-map.h +115 -0
  49. package/cpp/common/ngram-mod.cpp +60 -0
  50. package/cpp/common/ngram-mod.h +38 -0
  51. package/cpp/common/peg-parser.cpp +1712 -0
  52. package/cpp/common/peg-parser.h +459 -0
  53. package/cpp/common/preset.cpp +483 -0
  54. package/cpp/common/preset.h +83 -0
  55. package/cpp/common/regex-partial.cpp +204 -0
  56. package/cpp/common/regex-partial.h +56 -0
  57. package/cpp/common/sampling.cpp +745 -0
  58. package/cpp/common/sampling.h +119 -0
  59. package/cpp/common/speculative.cpp +1074 -0
  60. package/cpp/common/speculative.h +41 -0
  61. package/cpp/common/unicode.cpp +64 -0
  62. package/cpp/common/unicode.h +22 -0
  63. package/cpp/ggml/CMakeLists.txt +494 -0
  64. package/cpp/ggml/cmake/GitVars.cmake +22 -0
  65. package/cpp/ggml/cmake/common.cmake +50 -0
  66. package/cpp/ggml/cmake/ggml-config.cmake.in +191 -0
  67. package/cpp/ggml/include/ggml-alloc.h +85 -0
  68. package/cpp/ggml/include/ggml-backend.h +373 -0
  69. package/cpp/ggml/include/ggml-blas.h +25 -0
  70. package/cpp/ggml/include/ggml-cann.h +123 -0
  71. package/cpp/ggml/include/ggml-cpp.h +39 -0
  72. package/cpp/ggml/include/ggml-cpu.h +151 -0
  73. package/cpp/ggml/include/ggml-cuda.h +47 -0
  74. package/cpp/ggml/include/ggml-hexagon.h +19 -0
  75. package/cpp/ggml/include/ggml-metal.h +61 -0
  76. package/cpp/ggml/include/ggml-opencl.h +26 -0
  77. package/cpp/ggml/include/ggml-opt.h +256 -0
  78. package/cpp/ggml/include/ggml-rpc.h +30 -0
  79. package/cpp/ggml/include/ggml-sycl.h +49 -0
  80. package/cpp/ggml/include/ggml-virtgpu.h +14 -0
  81. package/cpp/ggml/include/ggml-vulkan.h +29 -0
  82. package/cpp/ggml/include/ggml-webgpu.h +19 -0
  83. package/cpp/ggml/include/ggml-zdnn.h +17 -0
  84. package/cpp/ggml/include/ggml-zendnn.h +22 -0
  85. package/cpp/ggml/include/ggml.h +2753 -0
  86. package/cpp/ggml/include/gguf.h +204 -0
  87. package/cpp/ggml/src/CMakeLists.txt +492 -0
  88. package/cpp/ggml/src/ggml-alloc.c +1244 -0
  89. package/cpp/ggml/src/ggml-backend-dl.cpp +48 -0
  90. package/cpp/ggml/src/ggml-backend-dl.h +45 -0
  91. package/cpp/ggml/src/ggml-backend-impl.h +255 -0
  92. package/cpp/ggml/src/ggml-backend-reg.cpp +566 -0
  93. package/cpp/ggml/src/ggml-backend.cpp +2270 -0
  94. package/cpp/ggml/src/ggml-blas/CMakeLists.txt +101 -0
  95. package/cpp/ggml/src/ggml-blas/ggml-blas.cpp +518 -0
  96. package/cpp/ggml/src/ggml-common.h +1878 -0
  97. package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +691 -0
  98. package/cpp/ggml/src/ggml-cpu/amx/amx.cpp +247 -0
  99. package/cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  100. package/cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  101. package/cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
  102. package/cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  103. package/cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
  104. package/cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4052 -0
  105. package/cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +4935 -0
  106. package/cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2159 -0
  107. package/cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  108. package/cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
  109. package/cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  110. package/cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2726 -0
  111. package/cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
  112. package/cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  113. package/cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
  114. package/cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
  115. package/cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  116. package/cpp/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
  117. package/cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
  118. package/cpp/ggml/src/ggml-cpu/arch-fallback.h +313 -0
  119. package/cpp/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
  120. package/cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  121. package/cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  122. package/cpp/ggml/src/ggml-cpu/common.h +95 -0
  123. package/cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +529 -0
  124. package/cpp/ggml/src/ggml-cpu/ggml-cpu.c +3734 -0
  125. package/cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +701 -0
  126. package/cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
  127. package/cpp/ggml/src/ggml-cpu/hbm.h +8 -0
  128. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +938 -0
  129. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
  130. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +798 -0
  131. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  132. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4033 -0
  133. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
  134. package/cpp/ggml/src/ggml-cpu/ops.cpp +10978 -0
  135. package/cpp/ggml/src/ggml-cpu/ops.h +116 -0
  136. package/cpp/ggml/src/ggml-cpu/quants.c +1193 -0
  137. package/cpp/ggml/src/ggml-cpu/quants.h +97 -0
  138. package/cpp/ggml/src/ggml-cpu/repack.cpp +3316 -0
  139. package/cpp/ggml/src/ggml-cpu/repack.h +173 -0
  140. package/cpp/ggml/src/ggml-cpu/simd-gemm.h +136 -0
  141. package/cpp/ggml/src/ggml-cpu/simd-mappings.h +1279 -0
  142. package/cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
  143. package/cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
  144. package/cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
  145. package/cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
  146. package/cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
  147. package/cpp/ggml/src/ggml-cpu/traits.h +38 -0
  148. package/cpp/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
  149. package/cpp/ggml/src/ggml-cpu/unary-ops.h +35 -0
  150. package/cpp/ggml/src/ggml-cpu/vec.cpp +629 -0
  151. package/cpp/ggml/src/ggml-cpu/vec.h +1585 -0
  152. package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
  153. package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3232 -0
  154. package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -0
  155. package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +815 -0
  156. package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
  157. package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +827 -0
  158. package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  159. package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +251 -0
  160. package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +666 -0
  161. package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +111 -0
  162. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
  163. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
  164. package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
  165. package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
  166. package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
  167. package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
  168. package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +154 -0
  169. package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +65 -0
  170. package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
  171. package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +470 -0
  172. package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +173 -0
  173. package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
  174. package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +116 -0
  175. package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
  176. package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
  177. package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
  178. package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +176 -0
  179. package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +266 -0
  180. package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
  181. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
  182. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
  183. package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
  184. package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +18 -0
  185. package/cpp/ggml/src/ggml-hexagon/htp/main.c +1150 -0
  186. package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2595 -0
  187. package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +498 -0
  188. package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +167 -0
  189. package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +421 -0
  190. package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +130 -0
  191. package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +384 -0
  192. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
  193. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  194. package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
  195. package/cpp/ggml/src/ggml-hexagon/htp-drv.h +121 -0
  196. package/cpp/ggml/src/ggml-hexagon/libdl.h +79 -0
  197. package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
  198. package/cpp/ggml/src/ggml-hexagon/op-desc.h +153 -0
  199. package/cpp/ggml/src/ggml-impl.h +724 -0
  200. package/cpp/ggml/src/ggml-metal/CMakeLists.txt +124 -0
  201. package/cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
  202. package/cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  203. package/cpp/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
  204. package/cpp/ggml/src/ggml-metal/ggml-metal-context.m +702 -0
  205. package/cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1890 -0
  206. package/cpp/ggml/src/ggml-metal/ggml-metal-device.h +290 -0
  207. package/cpp/ggml/src/ggml-metal/ggml-metal-device.m +1749 -0
  208. package/cpp/ggml/src/ggml-metal/ggml-metal-impl.h +1054 -0
  209. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +4370 -0
  210. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
  211. package/cpp/ggml/src/ggml-metal/ggml-metal.cpp +937 -0
  212. package/cpp/ggml/src/ggml-metal/ggml-metal.metal +9819 -0
  213. package/cpp/ggml/src/ggml-musa/CMakeLists.txt +125 -0
  214. package/cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  215. package/cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  216. package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +150 -0
  217. package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +11553 -0
  218. package/cpp/ggml/src/ggml-opencl/kernels/add.cl +190 -0
  219. package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
  220. package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  221. package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  222. package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
  223. package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  224. package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  225. package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  226. package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +417 -0
  227. package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  228. package/cpp/ggml/src/ggml-opencl/kernels/div.cl +138 -0
  229. package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  230. package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
  231. package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  232. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
  233. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
  234. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
  235. package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
  236. package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  237. package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  238. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  239. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  240. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
  241. package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
  242. package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
  243. package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
  244. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  245. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  246. package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
  247. package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
  248. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  249. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  250. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  251. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
  252. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
  253. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
  254. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
  255. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
  256. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
  257. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  258. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  259. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  260. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  261. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  262. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  263. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
  264. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  265. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  266. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  267. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  268. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
  269. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  270. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  271. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  272. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  273. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  274. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  275. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
  276. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
  277. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
  278. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
  279. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
  280. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  281. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  282. package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
  283. package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
  284. package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  285. package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
  286. package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
  287. package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
  288. package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
  289. package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
  290. package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  291. package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  292. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
  293. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
  294. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
  295. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
  296. package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
  297. package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
  298. package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  299. package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  300. package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  301. package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
  302. package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
  303. package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
  304. package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +117 -0
  305. package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
  306. package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  307. package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
  308. package/cpp/ggml/src/ggml-opt.cpp +1093 -0
  309. package/cpp/ggml/src/ggml-quants.c +5325 -0
  310. package/cpp/ggml/src/ggml-quants.h +106 -0
  311. package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  312. package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2118 -0
  313. package/cpp/ggml/src/ggml-threading.cpp +12 -0
  314. package/cpp/ggml/src/ggml-threading.h +14 -0
  315. package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
  316. package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
  317. package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
  318. package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
  319. package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
  320. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
  321. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
  322. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
  323. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
  324. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
  325. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
  326. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
  327. package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
  328. package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
  329. package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
  330. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
  331. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
  332. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
  333. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
  334. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
  335. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
  336. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
  337. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
  338. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
  339. package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
  340. package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
  341. package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
  342. package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
  343. package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
  344. package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
  345. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
  346. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
  347. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
  348. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
  349. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
  350. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
  351. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
  352. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
  353. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
  354. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
  355. package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
  356. package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
  357. package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
  358. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1231 -0
  359. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3150 -0
  360. package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  361. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
  362. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
  363. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
  364. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +107 -0
  365. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +923 -0
  366. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
  367. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
  368. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +182 -0
  369. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
  370. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +668 -0
  371. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
  372. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  373. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +713 -0
  374. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +103 -0
  375. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +138 -0
  376. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +188 -0
  377. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +194 -0
  378. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
  379. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
  380. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
  381. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
  382. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
  383. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
  384. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
  385. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
  386. package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
  387. package/cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
  388. package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +633 -0
  389. package/cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  390. package/cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  391. package/cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
  392. package/cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
  393. package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
  394. package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
  395. package/cpp/ggml/src/ggml.c +7669 -0
  396. package/cpp/ggml/src/ggml.cpp +26 -0
  397. package/cpp/ggml/src/gguf.cpp +1699 -0
  398. package/cpp/include/llama-cpp.h +32 -0
  399. package/cpp/include/llama.h +1568 -0
  400. package/cpp/mtmd/CMakeLists.txt +98 -0
  401. package/cpp/mtmd/README.md +63 -0
  402. package/cpp/mtmd/clip-graph.h +117 -0
  403. package/cpp/mtmd/clip-impl.h +586 -0
  404. package/cpp/mtmd/clip-model.h +390 -0
  405. package/cpp/mtmd/clip.cpp +4154 -0
  406. package/cpp/mtmd/clip.h +121 -0
  407. package/cpp/mtmd/deprecation-warning.cpp +22 -0
  408. package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +412 -0
  409. package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +280 -0
  410. package/cpp/mtmd/legacy-models/glmedge-surgery.py +33 -0
  411. package/cpp/mtmd/legacy-models/llava_surgery.py +38 -0
  412. package/cpp/mtmd/legacy-models/llava_surgery_v2.py +180 -0
  413. package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +892 -0
  414. package/cpp/mtmd/legacy-models/minicpmv-surgery.py +47 -0
  415. package/cpp/mtmd/models/cogvlm.cpp +98 -0
  416. package/cpp/mtmd/models/conformer.cpp +216 -0
  417. package/cpp/mtmd/models/glm4v.cpp +122 -0
  418. package/cpp/mtmd/models/internvl.cpp +69 -0
  419. package/cpp/mtmd/models/kimik25.cpp +101 -0
  420. package/cpp/mtmd/models/kimivl.cpp +63 -0
  421. package/cpp/mtmd/models/llama4.cpp +96 -0
  422. package/cpp/mtmd/models/llava.cpp +374 -0
  423. package/cpp/mtmd/models/minicpmv.cpp +114 -0
  424. package/cpp/mtmd/models/mobilenetv5.cpp +451 -0
  425. package/cpp/mtmd/models/models.h +128 -0
  426. package/cpp/mtmd/models/nemotron-v2-vl.cpp +35 -0
  427. package/cpp/mtmd/models/paddleocr.cpp +52 -0
  428. package/cpp/mtmd/models/pixtral.cpp +86 -0
  429. package/cpp/mtmd/models/qwen2vl.cpp +183 -0
  430. package/cpp/mtmd/models/qwen3vl.cpp +193 -0
  431. package/cpp/mtmd/models/siglip.cpp +86 -0
  432. package/cpp/mtmd/models/whisper-enc.cpp +115 -0
  433. package/cpp/mtmd/models/youtuvl.cpp +179 -0
  434. package/cpp/mtmd/mtmd-audio.cpp +730 -0
  435. package/cpp/mtmd/mtmd-audio.h +113 -0
  436. package/cpp/mtmd/mtmd-cli.cpp +437 -0
  437. package/cpp/mtmd/mtmd-helper.cpp +521 -0
  438. package/cpp/mtmd/mtmd-helper.h +96 -0
  439. package/cpp/mtmd/mtmd.cpp +1156 -0
  440. package/cpp/mtmd/mtmd.h +319 -0
  441. package/cpp/mtmd/requirements.txt +5 -0
  442. package/cpp/mtmd/test-1.jpeg +0 -0
  443. package/cpp/mtmd/test-2.mp3 +0 -0
  444. package/cpp/mtmd/tests.sh +192 -0
  445. package/cpp/src/CMakeLists.txt +169 -0
  446. package/cpp/src/llama-adapter.cpp +488 -0
  447. package/cpp/src/llama-adapter.h +89 -0
  448. package/cpp/src/llama-arch.cpp +2855 -0
  449. package/cpp/src/llama-arch.h +619 -0
  450. package/cpp/src/llama-batch.cpp +917 -0
  451. package/cpp/src/llama-batch.h +173 -0
  452. package/cpp/src/llama-chat.cpp +896 -0
  453. package/cpp/src/llama-chat.h +71 -0
  454. package/cpp/src/llama-context.cpp +3512 -0
  455. package/cpp/src/llama-context.h +359 -0
  456. package/cpp/src/llama-cparams.cpp +5 -0
  457. package/cpp/src/llama-cparams.h +44 -0
  458. package/cpp/src/llama-grammar.cpp +1464 -0
  459. package/cpp/src/llama-grammar.h +194 -0
  460. package/cpp/src/llama-graph.cpp +2685 -0
  461. package/cpp/src/llama-graph.h +1026 -0
  462. package/cpp/src/llama-hparams.cpp +234 -0
  463. package/cpp/src/llama-hparams.h +339 -0
  464. package/cpp/src/llama-impl.cpp +171 -0
  465. package/cpp/src/llama-impl.h +73 -0
  466. package/cpp/src/llama-io.cpp +15 -0
  467. package/cpp/src/llama-io.h +35 -0
  468. package/cpp/src/llama-kv-cache-iswa.cpp +330 -0
  469. package/cpp/src/llama-kv-cache-iswa.h +137 -0
  470. package/cpp/src/llama-kv-cache.cpp +2271 -0
  471. package/cpp/src/llama-kv-cache.h +388 -0
  472. package/cpp/src/llama-kv-cells.h +533 -0
  473. package/cpp/src/llama-memory-hybrid-iswa.cpp +275 -0
  474. package/cpp/src/llama-memory-hybrid-iswa.h +140 -0
  475. package/cpp/src/llama-memory-hybrid.cpp +268 -0
  476. package/cpp/src/llama-memory-hybrid.h +139 -0
  477. package/cpp/src/llama-memory-recurrent.cpp +1165 -0
  478. package/cpp/src/llama-memory-recurrent.h +182 -0
  479. package/cpp/src/llama-memory.cpp +59 -0
  480. package/cpp/src/llama-memory.h +122 -0
  481. package/cpp/src/llama-mmap.cpp +785 -0
  482. package/cpp/src/llama-mmap.h +92 -0
  483. package/cpp/src/llama-model-loader.cpp +1414 -0
  484. package/cpp/src/llama-model-loader.h +203 -0
  485. package/cpp/src/llama-model-saver.cpp +286 -0
  486. package/cpp/src/llama-model-saver.h +37 -0
  487. package/cpp/src/llama-model.cpp +9253 -0
  488. package/cpp/src/llama-model.h +576 -0
  489. package/cpp/src/llama-quant.cpp +1119 -0
  490. package/cpp/src/llama-quant.h +1 -0
  491. package/cpp/src/llama-sampler.cpp +3885 -0
  492. package/cpp/src/llama-sampler.h +42 -0
  493. package/cpp/src/llama-vocab.cpp +3970 -0
  494. package/cpp/src/llama-vocab.h +187 -0
  495. package/cpp/src/llama.cpp +1313 -0
  496. package/cpp/src/models/afmoe.cpp +191 -0
  497. package/cpp/src/models/apertus.cpp +125 -0
  498. package/cpp/src/models/arcee.cpp +135 -0
  499. package/cpp/src/models/arctic.cpp +138 -0
  500. package/cpp/src/models/arwkv7.cpp +86 -0
  501. package/cpp/src/models/baichuan.cpp +122 -0
  502. package/cpp/src/models/bailingmoe.cpp +144 -0
  503. package/cpp/src/models/bailingmoe2.cpp +135 -0
  504. package/cpp/src/models/bert.cpp +178 -0
  505. package/cpp/src/models/bitnet.cpp +160 -0
  506. package/cpp/src/models/bloom.cpp +101 -0
  507. package/cpp/src/models/chameleon.cpp +178 -0
  508. package/cpp/src/models/chatglm.cpp +132 -0
  509. package/cpp/src/models/codeshell.cpp +111 -0
  510. package/cpp/src/models/cogvlm.cpp +102 -0
  511. package/cpp/src/models/cohere2-iswa.cpp +134 -0
  512. package/cpp/src/models/command-r.cpp +122 -0
  513. package/cpp/src/models/dbrx.cpp +123 -0
  514. package/cpp/src/models/deci.cpp +135 -0
  515. package/cpp/src/models/deepseek.cpp +144 -0
  516. package/cpp/src/models/deepseek2.cpp +262 -0
  517. package/cpp/src/models/delta-net-base.cpp +376 -0
  518. package/cpp/src/models/dots1.cpp +134 -0
  519. package/cpp/src/models/dream.cpp +105 -0
  520. package/cpp/src/models/ernie4-5-moe.cpp +150 -0
  521. package/cpp/src/models/ernie4-5.cpp +110 -0
  522. package/cpp/src/models/eurobert.cpp +97 -0
  523. package/cpp/src/models/exaone-moe.cpp +146 -0
  524. package/cpp/src/models/exaone.cpp +114 -0
  525. package/cpp/src/models/exaone4.cpp +123 -0
  526. package/cpp/src/models/falcon-h1.cpp +111 -0
  527. package/cpp/src/models/falcon.cpp +120 -0
  528. package/cpp/src/models/gemma-embedding.cpp +116 -0
  529. package/cpp/src/models/gemma.cpp +112 -0
  530. package/cpp/src/models/gemma2-iswa.cpp +128 -0
  531. package/cpp/src/models/gemma3.cpp +155 -0
  532. package/cpp/src/models/gemma3n-iswa.cpp +384 -0
  533. package/cpp/src/models/glm4-moe.cpp +170 -0
  534. package/cpp/src/models/glm4.cpp +157 -0
  535. package/cpp/src/models/gpt2.cpp +105 -0
  536. package/cpp/src/models/gptneox.cpp +144 -0
  537. package/cpp/src/models/granite-hybrid.cpp +196 -0
  538. package/cpp/src/models/granite.cpp +211 -0
  539. package/cpp/src/models/grok.cpp +159 -0
  540. package/cpp/src/models/grovemoe.cpp +141 -0
  541. package/cpp/src/models/hunyuan-dense.cpp +132 -0
  542. package/cpp/src/models/hunyuan-moe.cpp +154 -0
  543. package/cpp/src/models/internlm2.cpp +120 -0
  544. package/cpp/src/models/jais.cpp +86 -0
  545. package/cpp/src/models/jais2.cpp +123 -0
  546. package/cpp/src/models/jamba.cpp +106 -0
  547. package/cpp/src/models/kimi-linear.cpp +392 -0
  548. package/cpp/src/models/lfm2.cpp +190 -0
  549. package/cpp/src/models/llada-moe.cpp +122 -0
  550. package/cpp/src/models/llada.cpp +99 -0
  551. package/cpp/src/models/llama-iswa.cpp +178 -0
  552. package/cpp/src/models/llama.cpp +168 -0
  553. package/cpp/src/models/maincoder.cpp +117 -0
  554. package/cpp/src/models/mamba-base.cpp +285 -0
  555. package/cpp/src/models/mamba.cpp +54 -0
  556. package/cpp/src/models/mimo2-iswa.cpp +123 -0
  557. package/cpp/src/models/minicpm3.cpp +200 -0
  558. package/cpp/src/models/minimax-m2.cpp +124 -0
  559. package/cpp/src/models/mistral3.cpp +160 -0
  560. package/cpp/src/models/models.h +684 -0
  561. package/cpp/src/models/modern-bert.cpp +109 -0
  562. package/cpp/src/models/mpt.cpp +126 -0
  563. package/cpp/src/models/nemotron-h.cpp +148 -0
  564. package/cpp/src/models/nemotron.cpp +122 -0
  565. package/cpp/src/models/neo-bert.cpp +104 -0
  566. package/cpp/src/models/olmo.cpp +121 -0
  567. package/cpp/src/models/olmo2.cpp +150 -0
  568. package/cpp/src/models/olmoe.cpp +124 -0
  569. package/cpp/src/models/openai-moe-iswa.cpp +127 -0
  570. package/cpp/src/models/openelm.cpp +124 -0
  571. package/cpp/src/models/orion.cpp +123 -0
  572. package/cpp/src/models/paddleocr.cpp +122 -0
  573. package/cpp/src/models/pangu-embedded.cpp +121 -0
  574. package/cpp/src/models/phi2.cpp +121 -0
  575. package/cpp/src/models/phi3.cpp +152 -0
  576. package/cpp/src/models/plamo.cpp +110 -0
  577. package/cpp/src/models/plamo2.cpp +318 -0
  578. package/cpp/src/models/plamo3.cpp +128 -0
  579. package/cpp/src/models/plm.cpp +169 -0
  580. package/cpp/src/models/qwen.cpp +108 -0
  581. package/cpp/src/models/qwen2.cpp +126 -0
  582. package/cpp/src/models/qwen2moe.cpp +151 -0
  583. package/cpp/src/models/qwen2vl.cpp +117 -0
  584. package/cpp/src/models/qwen3.cpp +117 -0
  585. package/cpp/src/models/qwen35.cpp +386 -0
  586. package/cpp/src/models/qwen35moe.cpp +420 -0
  587. package/cpp/src/models/qwen3moe.cpp +124 -0
  588. package/cpp/src/models/qwen3next.cpp +525 -0
  589. package/cpp/src/models/qwen3vl-moe.cpp +140 -0
  590. package/cpp/src/models/qwen3vl.cpp +132 -0
  591. package/cpp/src/models/refact.cpp +94 -0
  592. package/cpp/src/models/rnd1.cpp +126 -0
  593. package/cpp/src/models/rwkv6-base.cpp +164 -0
  594. package/cpp/src/models/rwkv6.cpp +94 -0
  595. package/cpp/src/models/rwkv6qwen2.cpp +86 -0
  596. package/cpp/src/models/rwkv7-base.cpp +137 -0
  597. package/cpp/src/models/rwkv7.cpp +90 -0
  598. package/cpp/src/models/seed-oss.cpp +124 -0
  599. package/cpp/src/models/smallthinker.cpp +126 -0
  600. package/cpp/src/models/smollm3.cpp +128 -0
  601. package/cpp/src/models/stablelm.cpp +146 -0
  602. package/cpp/src/models/starcoder.cpp +100 -0
  603. package/cpp/src/models/starcoder2.cpp +121 -0
  604. package/cpp/src/models/step35-iswa.cpp +168 -0
  605. package/cpp/src/models/t5-dec.cpp +166 -0
  606. package/cpp/src/models/t5-enc.cpp +96 -0
  607. package/cpp/src/models/wavtokenizer-dec.cpp +149 -0
  608. package/cpp/src/models/xverse.cpp +108 -0
  609. package/cpp/src/unicode-data.cpp +7034 -0
  610. package/cpp/src/unicode-data.h +20 -0
  611. package/cpp/src/unicode.cpp +1103 -0
  612. package/cpp/src/unicode.h +111 -0
  613. package/cpp/vendor/nlohmann/json.hpp +25526 -0
  614. package/cpp/vendor/nlohmann/json_fwd.hpp +187 -0
  615. package/cpp/vendor/stb/stb_image.h +7988 -0
  616. package/ios/LocalLLM-Bridging-Header.h +2 -0
  617. package/ios/LocalLLM.h +5 -0
  618. package/ios/LocalLLM.mm +1267 -0
  619. package/local-llm-rn.podspec +60 -0
  620. package/package.json +35 -0
  621. package/src/NativeLocalLLM.ts +73 -0
  622. package/src/device.ts +50 -0
  623. package/src/download-adapter.ts +17 -0
  624. package/src/index.ts +21 -0
  625. package/src/native-bridge.ts +142 -0
  626. package/src/rn-downloader.ts +37 -0
@@ -0,0 +1,319 @@
1
+ #ifndef MTMD_H
2
+ #define MTMD_H
3
+
4
+ #include "ggml.h"
5
+ #include "llama.h"
6
+
7
+ #include <stddef.h>
8
+ #include <stdint.h>
9
+ #include <stdbool.h>
10
+
11
+ #ifdef __cplusplus
12
+ #include <string>
13
+ #include <vector>
14
+ #include <cinttypes>
15
+ #include <memory>
16
+ #endif
17
+
18
+ /**
19
+ * libmtmd: A library for multimodal support in llama.cpp.
20
+ *
21
+ * WARNING: This API is experimental and subject to many BREAKING CHANGES.
22
+ * Issues related to API usage may receive lower priority support.
23
+ *
24
+ * For the usage, see an example in mtmd-cli.cpp
25
+ *
26
+ * For contributors:
27
+ * - Make sure the C API is aligned with the libllama C API (as in llama.h)
28
+ * - Do not include model name (e.g., qwen, gemma) in the API, use generic terms instead
29
+ * - Keep the API minimal, do not expose internal details unless necessary
30
+ *
31
+ * IMPORTANT: The mtmd module does NOT accept pull requests that are fully or predominantly AI-generated.
32
+ * We encourage human contributors to ensure the quality and reliability of the codebase.
33
+ */
34
+
35
+ #ifdef LLAMA_SHARED
36
+ # if defined(_WIN32) && !defined(__MINGW32__)
37
+ # ifdef LLAMA_BUILD
38
+ # define MTMD_API __declspec(dllexport)
39
+ # else
40
+ # define MTMD_API __declspec(dllimport)
41
+ # endif
42
+ # else
43
+ # define MTMD_API __attribute__ ((visibility ("default")))
44
+ # endif
45
+ #else
46
+ # define MTMD_API
47
+ #endif
48
+
49
+ // deprecated marker, use mtmd_default_marker() instead
50
+ #define MTMD_DEFAULT_IMAGE_MARKER "<__image__>"
51
+
52
+ #ifdef __cplusplus
53
+ extern "C" {
54
+ #endif
55
+
56
+ enum mtmd_input_chunk_type {
57
+ MTMD_INPUT_CHUNK_TYPE_TEXT,
58
+ MTMD_INPUT_CHUNK_TYPE_IMAGE,
59
+ MTMD_INPUT_CHUNK_TYPE_AUDIO,
60
+ };
61
+
62
+ // opaque types
63
+ struct mtmd_context;
64
+ struct mtmd_bitmap;
65
+ struct mtmd_image_tokens;
66
+ struct mtmd_input_chunk;
67
+ struct mtmd_input_chunks;
68
+
69
+ struct mtmd_input_text {
70
+ const char * text;
71
+ bool add_special;
72
+ bool parse_special;
73
+ };
74
+
75
+ //
76
+ // C API
77
+ //
78
+
79
+ typedef struct mtmd_context mtmd_context;
80
+ typedef struct mtmd_bitmap mtmd_bitmap;
81
+ typedef struct mtmd_image_tokens mtmd_image_tokens;
82
+ typedef struct mtmd_input_chunk mtmd_input_chunk;
83
+ typedef struct mtmd_input_chunks mtmd_input_chunks;
84
+ typedef struct mtmd_input_text mtmd_input_text;
85
+
86
+ struct mtmd_context_params {
87
+ bool use_gpu;
88
+ bool print_timings;
89
+ int n_threads;
90
+ const char * image_marker; // deprecated, use media_marker instead
91
+ const char * media_marker;
92
+ enum llama_flash_attn_type flash_attn_type;
93
+ bool warmup; // whether to run a warmup encode pass after initialization
94
+
95
+ // limit number of image tokens, only for vision models with dynamic resolution
96
+ int image_min_tokens; // minimum number of tokens for image input (default: read from metadata)
97
+ int image_max_tokens; // maximum number of tokens for image input (default: read from metadata)
98
+
99
+ // callback function passed over to mtmd proper
100
+ ggml_backend_sched_eval_callback cb_eval;
101
+ void * cb_eval_user_data;
102
+ };
103
+
104
+ MTMD_API const char * mtmd_default_marker(void);
105
+
106
+ MTMD_API struct mtmd_context_params mtmd_context_params_default(void);
107
+
108
+ // initialize the mtmd context
109
+ // return nullptr on failure
110
+ MTMD_API mtmd_context * mtmd_init_from_file(const char * mmproj_fname,
111
+ const struct llama_model * text_model,
112
+ const struct mtmd_context_params ctx_params);
113
+
114
+ MTMD_API void mtmd_free(mtmd_context * ctx);
115
+
116
+ // whether we need to set non-causal mask before llama_decode
117
+ MTMD_API bool mtmd_decode_use_non_causal(mtmd_context * ctx);
118
+
119
+ // whether the current model use M-RoPE for llama_decode
120
+ MTMD_API bool mtmd_decode_use_mrope(mtmd_context * ctx);
121
+
122
+ // whether the current model supports vision input
123
+ MTMD_API bool mtmd_support_vision(mtmd_context * ctx);
124
+
125
+ // whether the current model supports audio input
126
+ MTMD_API bool mtmd_support_audio(mtmd_context * ctx);
127
+
128
+ // get audio bitrate in Hz, for example 16000 for Whisper
129
+ // return -1 if audio is not supported
130
+ MTMD_API int mtmd_get_audio_bitrate(mtmd_context * ctx);
131
+
132
+ // mtmd_bitmap
133
+ //
134
+ // if bitmap is image:
135
+ // length of data must be nx * ny * 3
136
+ // the data is in RGBRGBRGB... format
137
+ // if bitmap is audio:
138
+ // length of data must be n_samples * sizeof(float)
139
+ // the data is in float format (PCM F32)
140
+ MTMD_API mtmd_bitmap * mtmd_bitmap_init (uint32_t nx, uint32_t ny, const unsigned char * data);
141
+ MTMD_API mtmd_bitmap * mtmd_bitmap_init_from_audio(size_t n_samples, const float * data);
142
+ MTMD_API uint32_t mtmd_bitmap_get_nx (const mtmd_bitmap * bitmap);
143
+ MTMD_API uint32_t mtmd_bitmap_get_ny (const mtmd_bitmap * bitmap);
144
+ MTMD_API const unsigned char * mtmd_bitmap_get_data (const mtmd_bitmap * bitmap);
145
+ MTMD_API size_t mtmd_bitmap_get_n_bytes(const mtmd_bitmap * bitmap);
146
+ MTMD_API bool mtmd_bitmap_is_audio (const mtmd_bitmap * bitmap);
147
+ MTMD_API void mtmd_bitmap_free (mtmd_bitmap * bitmap);
148
+ // bitmap ID is optional, but useful for KV cache tracking
149
+ // these getters/setters are dedicated functions, so you can for example calculate the hash of the image based on mtmd_bitmap_get_data()
150
+ MTMD_API const char * mtmd_bitmap_get_id(const mtmd_bitmap * bitmap);
151
+ MTMD_API void mtmd_bitmap_set_id(mtmd_bitmap * bitmap, const char * id);
152
+
153
+
154
+ // mtmd_input_chunks
155
+ //
156
+ // this is simply a list of mtmd_input_chunk
157
+ // the elements can only be populated via mtmd_tokenize()
158
+ MTMD_API mtmd_input_chunks * mtmd_input_chunks_init(void);
159
+ MTMD_API size_t mtmd_input_chunks_size(const mtmd_input_chunks * chunks);
160
+ MTMD_API const mtmd_input_chunk * mtmd_input_chunks_get (const mtmd_input_chunks * chunks, size_t idx);
161
+ MTMD_API void mtmd_input_chunks_free(mtmd_input_chunks * chunks);
162
+
163
+ // mtmd_input_chunk
164
+ //
165
+ // the instance will be constructed via mtmd_tokenize()
166
+ // it will be freed along with mtmd_input_chunks
167
+ MTMD_API enum mtmd_input_chunk_type mtmd_input_chunk_get_type (const mtmd_input_chunk * chunk);
168
+ MTMD_API const llama_token * mtmd_input_chunk_get_tokens_text (const mtmd_input_chunk * chunk, size_t * n_tokens_output);
169
+ MTMD_API const mtmd_image_tokens * mtmd_input_chunk_get_tokens_image(const mtmd_input_chunk * chunk);
170
+ MTMD_API size_t mtmd_input_chunk_get_n_tokens (const mtmd_input_chunk * chunk);
171
+ // returns nullptr for ID on text chunk
172
+ MTMD_API const char * mtmd_input_chunk_get_id (const mtmd_input_chunk * chunk);
173
+ // number of temporal positions (equals to max(t,h,w) for M-RoPE; equals to n_tokens otherwise)
174
+ MTMD_API llama_pos mtmd_input_chunk_get_n_pos (const mtmd_input_chunk * chunk);
175
+
176
+ // in case you want to use custom logic to handle the chunk (i.e. KV cache management)
177
+ // you can move the chunk ownership to your own code by copying it
178
+ // remember to free the chunk when you are done with it
179
+ MTMD_API mtmd_input_chunk * mtmd_input_chunk_copy(const mtmd_input_chunk * chunk);
180
+ MTMD_API void mtmd_input_chunk_free(mtmd_input_chunk * chunk);
181
+
182
+
183
+ // mtmd_image_tokens
184
+ //
185
+ // the instance will be constructed via mtmd_tokenize()
186
+ // it will be freed along with mtmd_input_chunk
187
+ MTMD_API size_t mtmd_image_tokens_get_n_tokens(const mtmd_image_tokens * image_tokens); // TODO: deprecate
188
+ MTMD_API size_t mtmd_image_tokens_get_nx (const mtmd_image_tokens * image_tokens);
189
+ MTMD_API size_t mtmd_image_tokens_get_ny (const mtmd_image_tokens * image_tokens);
190
+ MTMD_API const char * mtmd_image_tokens_get_id (const mtmd_image_tokens * image_tokens); // TODO: deprecate
191
+ // number of temporal positions (equals to max(t,h,w) for M-RoPE; equals to n_tokens otherwise)
192
+ MTMD_API llama_pos mtmd_image_tokens_get_n_pos (const mtmd_image_tokens * image_tokens); // TODO: deprecate
193
+
194
+ // tokenize an input text prompt and a list of bitmaps (images/audio)
195
+ // the prompt must have the input image marker (default: "<__media__>") in it
196
+ // the default marker is defined by mtmd_default_marker()
197
+ // the marker will be replaced with the image/audio chunk
198
+ // for example:
199
+ // "here is an image: <__media__>\ndescribe it in detail."
200
+ // this will gives 3 chunks:
201
+ // 1. "here is an image: <start_of_image>"
202
+ // 2. (image/audio tokens)
203
+ // 3. "<end_of_image>\ndescribe it in detail."
204
+ // number of bitmaps must be equal to the number of markers in the prompt
205
+ // this function is thread-safe (shared ctx)
206
+ // return values:
207
+ // 0 on success
208
+ // 1 on number of bitmaps not matching the number of markers
209
+ // 2 on image preprocessing error
210
+ MTMD_API int32_t mtmd_tokenize(mtmd_context * ctx,
211
+ mtmd_input_chunks * output,
212
+ const mtmd_input_text * text,
213
+ const mtmd_bitmap ** bitmaps,
214
+ size_t n_bitmaps);
215
+
216
+ // returns 0 on success
217
+ // TODO: deprecate
218
+ MTMD_API int32_t mtmd_encode(mtmd_context * ctx,
219
+ const mtmd_image_tokens * image_tokens);
220
+
221
+ // returns 0 on success
222
+ MTMD_API int32_t mtmd_encode_chunk(mtmd_context * ctx,
223
+ const mtmd_input_chunk * chunk);
224
+
225
+ // get output embeddings from the last encode pass
226
+ // the reading size (in bytes) is equal to:
227
+ // llama_model_n_embd_inp(model) * mtmd_input_chunk_get_n_tokens(chunk) * sizeof(float)
228
+ MTMD_API float * mtmd_get_output_embd(mtmd_context * ctx);
229
+
230
+ // Set callback for all future logging events.
231
+ // If this is not called, or NULL is supplied, everything is output on stderr.
232
+ MTMD_API void mtmd_log_set(ggml_log_callback log_callback, void * user_data);
233
+
234
+ /////////////////////////////////////////
235
+
236
+ // test function, to be used in test-mtmd-c-api.c
237
+ MTMD_API mtmd_input_chunks * mtmd_test_create_input_chunks(void);
238
+
239
+ #ifdef __cplusplus
240
+ } // extern "C"
241
+ #endif
242
+
243
+ //
244
+ // C++ wrappers
245
+ //
246
+
247
+ #ifdef __cplusplus
248
+
249
+ namespace mtmd {
250
+
251
+ struct mtmd_context_deleter {
252
+ void operator()(mtmd_context * val) { mtmd_free(val); }
253
+ };
254
+ using context_ptr = std::unique_ptr<mtmd_context, mtmd_context_deleter>;
255
+
256
+ struct mtmd_bitmap_deleter {
257
+ void operator()(mtmd_bitmap * val) { mtmd_bitmap_free(val); }
258
+ };
259
+ using bitmap_ptr = std::unique_ptr<mtmd_bitmap, mtmd_bitmap_deleter>;
260
+
261
+ struct mtmd_input_chunks_deleter {
262
+ void operator()(mtmd_input_chunks * val) { mtmd_input_chunks_free(val); }
263
+ };
264
+ using input_chunks_ptr = std::unique_ptr<mtmd_input_chunks, mtmd_input_chunks_deleter>;
265
+
266
+ struct mtmd_input_chunk_deleter {
267
+ void operator()(mtmd_input_chunk * val) { mtmd_input_chunk_free(val); }
268
+ };
269
+ using input_chunk_ptr = std::unique_ptr<mtmd_input_chunk, mtmd_input_chunk_deleter>;
270
+
271
+ struct bitmap {
272
+ bitmap_ptr ptr;
273
+ bitmap() : ptr(nullptr) {}
274
+ bitmap(mtmd_bitmap * bitmap) : ptr(bitmap) {}
275
+ bitmap(bitmap && other) noexcept : ptr(std::move(other.ptr)) {}
276
+ bitmap(uint32_t nx, uint32_t ny, const unsigned char * data) {
277
+ ptr.reset(mtmd_bitmap_init(nx, ny, data));
278
+ }
279
+ ~bitmap() = default;
280
+ uint32_t nx() const { return mtmd_bitmap_get_nx(ptr.get()); }
281
+ uint32_t ny() const { return mtmd_bitmap_get_ny(ptr.get()); }
282
+ const unsigned char * data() const { return mtmd_bitmap_get_data(ptr.get()); }
283
+ size_t n_bytes() const { return mtmd_bitmap_get_n_bytes(ptr.get()); }
284
+ std::string id() const { return mtmd_bitmap_get_id(ptr.get()); }
285
+ void set_id(const char * id) const { mtmd_bitmap_set_id(ptr.get(), id); }
286
+ };
287
+
288
+ struct bitmaps {
289
+ std::vector<bitmap> entries;
290
+ ~bitmaps() = default;
291
+ // return list of pointers to mtmd_bitmap
292
+ // example:
293
+ // auto bitmaps_c_ptr = bitmaps.c_ptr();
294
+ // int32_t res = mtmd_tokenize(... bitmaps_c_ptr.data(), bitmaps_c_ptr.size());
295
+ std::vector<const mtmd_bitmap *> c_ptr() {
296
+ std::vector<const mtmd_bitmap *> res(entries.size());
297
+ for (size_t i = 0; i < entries.size(); i++) {
298
+ res[i] = entries[i].ptr.get();
299
+ }
300
+ return res;
301
+ }
302
+ };
303
+
304
+ struct input_chunks {
305
+ input_chunks_ptr ptr;
306
+ input_chunks() = default;
307
+ input_chunks(mtmd_input_chunks * chunks) : ptr(chunks) {}
308
+ ~input_chunks() = default;
309
+ size_t size() const { return mtmd_input_chunks_size(ptr.get()); }
310
+ const mtmd_input_chunk * operator[](size_t idx) const {
311
+ return mtmd_input_chunks_get(ptr.get(), idx);
312
+ }
313
+ };
314
+
315
+ } // namespace mtmd
316
+
317
+ #endif
318
+
319
+ #endif
@@ -0,0 +1,5 @@
1
+ -r ../../requirements/requirements-convert_legacy_llama.txt
2
+ --extra-index-url https://download.pytorch.org/whl/cpu
3
+ pillow~=11.3.0
4
+ torch~=2.6.0
5
+ torchvision~=0.21.0
Binary file
Binary file
@@ -0,0 +1,192 @@
1
+ #!/usr/bin/env bash
2
+
3
+ # make sure we are in the right directory
4
+ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
5
+ cd $SCRIPT_DIR
6
+
7
+ #export LLAMA_CACHE="$SCRIPT_DIR/tmp"
8
+
9
+ set -eux
10
+
11
+ mkdir -p $SCRIPT_DIR/output
12
+
13
+ PROJ_ROOT="$SCRIPT_DIR/../.."
14
+ cd $PROJ_ROOT
15
+
16
+ # Check if the first argument is "big", then run test with big models
17
+ # This is useful if we're running the script on a larger machine, so we can test the big models
18
+ RUN_BIG_TESTS=false
19
+ if [ "${1:-}" = "big" ]; then
20
+ RUN_BIG_TESTS=true
21
+ echo "Include BIG models..."
22
+ fi
23
+
24
+ RUN_HUGE_TESTS=false
25
+ if [ "${1:-}" = "huge" ]; then
26
+ RUN_HUGE_TESTS=true
27
+ RUN_BIG_TESTS=true
28
+ echo "Include BIG and HUGE models..."
29
+ fi
30
+
31
+ # Check if the second argument is "flash", then enable flash attention
32
+ # This is useful to test if flash attention off works correctly
33
+ FLASH_ATTN="on"
34
+ if [ "${2:-}" = "flash_off" ] || [ "${1:-}" = "flash_off" ]; then
35
+ FLASH_ATTN="off"
36
+ echo "Flash attention disabled..."
37
+ fi
38
+
39
+ ###############
40
+
41
+ arr_prefix=()
42
+ arr_hf=()
43
+ arr_extra_args=()
44
+ arr_file=()
45
+
46
+ add_test_vision() {
47
+ local hf=$1
48
+ shift
49
+ local extra_args=""
50
+ if [ $# -gt 0 ]; then
51
+ extra_args=$(printf " %q" "$@")
52
+ fi
53
+ arr_prefix+=("[vision]")
54
+ arr_hf+=("$hf")
55
+ arr_extra_args+=("$extra_args")
56
+ arr_file+=("test-1.jpeg")
57
+ }
58
+
59
+ add_test_audio() {
60
+ local hf=$1
61
+ shift
62
+ local extra_args=""
63
+ if [ $# -gt 0 ]; then
64
+ extra_args=$(printf " %q" "$@")
65
+ fi
66
+ arr_prefix+=("[audio] ")
67
+ arr_hf+=("$hf")
68
+ arr_extra_args+=("$extra_args")
69
+ arr_file+=("test-2.mp3")
70
+ }
71
+
72
+ add_test_vision "ggml-org/SmolVLM-500M-Instruct-GGUF:Q8_0"
73
+ add_test_vision "ggml-org/SmolVLM2-2.2B-Instruct-GGUF:Q4_K_M"
74
+ add_test_vision "ggml-org/SmolVLM2-500M-Video-Instruct-GGUF:Q8_0"
75
+ add_test_vision "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M"
76
+ add_test_vision "THUDM/glm-edge-v-5b-gguf:Q4_K_M" -p "name of the newspaper?<__media__>"
77
+ add_test_vision "second-state/Llava-v1.5-7B-GGUF:Q2_K" --chat-template vicuna
78
+ add_test_vision "cjpais/llava-1.6-mistral-7b-gguf:Q3_K_M" --chat-template vicuna
79
+ add_test_vision "ibm-research/granite-vision-3.2-2b-GGUF:Q4_K_M"
80
+ add_test_vision "second-state/MiniCPM-Llama3-V-2_5-GGUF:Q2_K" # model from openbmb is corrupted
81
+ add_test_vision "openbmb/MiniCPM-V-2_6-gguf:Q2_K"
82
+ add_test_vision "openbmb/MiniCPM-o-2_6-gguf:Q4_0"
83
+ add_test_vision "bartowski/Qwen2-VL-2B-Instruct-GGUF:Q4_K_M"
84
+ add_test_vision "ggml-org/Qwen2.5-VL-3B-Instruct-GGUF:Q4_K_M"
85
+ add_test_vision "ggml-org/InternVL2_5-1B-GGUF:Q8_0"
86
+ add_test_vision "ggml-org/InternVL3-1B-Instruct-GGUF:Q8_0"
87
+ add_test_vision "ggml-org/Qwen2.5-Omni-3B-GGUF:Q4_K_M"
88
+ add_test_vision "ggml-org/LFM2-VL-450M-GGUF:Q8_0"
89
+ add_test_vision "ggml-org/granite-docling-258M-GGUF:Q8_0"
90
+ add_test_vision "ggml-org/LightOnOCR-1B-1025-GGUF:Q8_0"
91
+
92
+ add_test_audio "ggml-org/ultravox-v0_5-llama-3_2-1b-GGUF:Q8_0"
93
+ add_test_audio "ggml-org/Qwen2.5-Omni-3B-GGUF:Q4_K_M"
94
+ add_test_audio "ggml-org/Voxtral-Mini-3B-2507-GGUF:Q4_K_M"
95
+ add_test_audio "ggml-org/LFM2-Audio-1.5B-GGUF:Q8_0"
96
+
97
+ # to test the big models, run: ./tests.sh big
98
+ if [ "$RUN_BIG_TESTS" = true ]; then
99
+ add_test_vision "ggml-org/pixtral-12b-GGUF:Q4_K_M"
100
+ add_test_vision "ggml-org/Mistral-Small-3.1-24B-Instruct-2503-GGUF" --chat-template mistral-v7
101
+ add_test_vision "ggml-org/Qwen2-VL-2B-Instruct-GGUF:Q4_K_M"
102
+ add_test_vision "ggml-org/Qwen2-VL-7B-Instruct-GGUF:Q4_K_M"
103
+ add_test_vision "ggml-org/Qwen2.5-VL-3B-Instruct-GGUF:Q4_K_M"
104
+ add_test_vision "ggml-org/Qwen2.5-VL-7B-Instruct-GGUF:Q4_K_M"
105
+ add_test_vision "ggml-org/Qwen3-VL-2B-Instruct-GGUF:Q8_0"
106
+ add_test_vision "ggml-org/InternVL3-8B-Instruct-GGUF:Q4_K_M"
107
+ add_test_vision "ggml-org/InternVL3-14B-Instruct-GGUF:Q4_K_M"
108
+ add_test_vision "ggml-org/Qwen2.5-Omni-7B-GGUF:Q4_K_M"
109
+ # add_test_vision "ggml-org/Qwen2.5-VL-32B-Instruct-GGUF:Q4_K_M" # does not work on my mac M3 Ultra
110
+ # add_test_vision "ggml-org/Kimi-VL-A3B-Thinking-2506-GGUF:Q4_K_M" # not always working
111
+
112
+ add_test_audio "ggml-org/ultravox-v0_5-llama-3_1-8b-GGUF:Q4_K_M"
113
+ add_test_audio "ggml-org/Qwen2.5-Omni-7B-GGUF:Q4_K_M"
114
+ fi
115
+
116
+ # to test the huge models, run: ./tests.sh huge
117
+ # this will run both the big and huge models
118
+ # huge models are > 32B parameters
119
+ if [ "$RUN_HUGE_TESTS" = true ]; then
120
+ add_test_vision "ggml-org/Qwen2.5-VL-72B-Instruct-GGUF:Q4_K_M"
121
+ add_test_vision "ggml-org/Llama-4-Scout-17B-16E-Instruct-GGUF:IQ1_S"
122
+ fi
123
+
124
+ # these models always give the wrong answer, not sure why
125
+ # add_test_vision "ggml-org/SmolVLM-Instruct-GGUF:Q4_K_M"
126
+ # add_test_vision "ggml-org/SmolVLM-256M-Instruct-GGUF:Q8_0"
127
+ # add_test_vision "ggml-org/SmolVLM2-256M-Video-Instruct-GGUF:Q8_0"
128
+
129
+ # this model has broken chat template, not usable
130
+ # add_test_vision "cmp-nct/Yi-VL-6B-GGUF:Q5_K"
131
+ # add_test_vision "guinmoon/MobileVLM-3B-GGUF:Q4_K_M" "deepseek"
132
+
133
+ ###############
134
+
135
+ cmake --build build -j --target llama-mtmd-cli
136
+
137
+ arr_res=()
138
+
139
+ for i in "${!arr_hf[@]}"; do
140
+ bin="llama-mtmd-cli"
141
+ prefix="${arr_prefix[$i]}"
142
+ hf="${arr_hf[$i]}"
143
+ extra_args="${arr_extra_args[$i]}"
144
+ inp_file="${arr_file[$i]}"
145
+
146
+ echo "Running test with binary: $bin and HF model: $hf"
147
+ echo ""
148
+ echo ""
149
+
150
+ cmd="$(printf %q "$PROJ_ROOT/build/bin/$bin") \
151
+ -hf $(printf %q "$hf") \
152
+ --image $(printf %q "$SCRIPT_DIR/$inp_file") \
153
+ --temp 0 -n 128 \
154
+ --flash-attn $(printf %q "$FLASH_ATTN") \
155
+ ${extra_args}"
156
+
157
+ # if extra_args does not contain -p, we add a default prompt
158
+ if ! [[ "$extra_args" =~ "-p" ]]; then
159
+ cmd+=" -p \"what is the publisher name of the newspaper?\""
160
+ fi
161
+
162
+ output=$(eval "$cmd" 2>&1 | tee /dev/tty)
163
+
164
+ echo "$output" > $SCRIPT_DIR/output/$bin-$(echo "$hf" | tr '/' '-').log
165
+
166
+ # either contains "new york" or both "men" and "walk"
167
+ if echo "$output" | grep -iq "new york" \
168
+ || (echo "$output" | grep -iq "men" && echo "$output" | grep -iq "walk")
169
+ then
170
+ result="$prefix \033[32mOK\033[0m: $hf"
171
+ else
172
+ result="$prefix \033[31mFAIL\033[0m: $hf"
173
+ fi
174
+ echo -e "$result"
175
+ arr_res+=("$result")
176
+
177
+ echo ""
178
+ echo ""
179
+ echo ""
180
+ echo "#################################################"
181
+ echo "#################################################"
182
+ echo ""
183
+ echo ""
184
+ done
185
+
186
+ set +x
187
+
188
+ for i in "${!arr_res[@]}"; do
189
+ echo -e "${arr_res[$i]}"
190
+ done
191
+ echo ""
192
+ echo "Output logs are saved in $SCRIPT_DIR/output"
@@ -0,0 +1,169 @@
1
+ llama_add_compile_flags()
2
+
3
+ #
4
+ # libraries
5
+ #
6
+
7
+ # llama
8
+
9
+ add_library(llama
10
+ ../include/llama.h
11
+ llama.cpp
12
+ llama-adapter.cpp
13
+ llama-arch.cpp
14
+ llama-batch.cpp
15
+ llama-chat.cpp
16
+ llama-context.cpp
17
+ llama-cparams.cpp
18
+ llama-grammar.cpp
19
+ llama-graph.cpp
20
+ llama-hparams.cpp
21
+ llama-impl.cpp
22
+ llama-io.cpp
23
+ llama-kv-cache.cpp
24
+ llama-kv-cache-iswa.cpp
25
+ llama-memory.cpp
26
+ llama-memory-hybrid.cpp
27
+ llama-memory-hybrid-iswa.cpp
28
+ llama-memory-recurrent.cpp
29
+ llama-mmap.cpp
30
+ llama-model-loader.cpp
31
+ llama-model-saver.cpp
32
+ llama-model.cpp
33
+ llama-quant.cpp
34
+ llama-sampler.cpp
35
+ llama-vocab.cpp
36
+ unicode-data.cpp
37
+ unicode.cpp
38
+ unicode.h
39
+ models/afmoe.cpp
40
+ models/apertus.cpp
41
+ models/arcee.cpp
42
+ models/arctic.cpp
43
+ models/arwkv7.cpp
44
+ models/baichuan.cpp
45
+ models/bailingmoe.cpp
46
+ models/bailingmoe2.cpp
47
+ models/bert.cpp
48
+ models/bitnet.cpp
49
+ models/bloom.cpp
50
+ models/chameleon.cpp
51
+ models/chatglm.cpp
52
+ models/codeshell.cpp
53
+ models/cogvlm.cpp
54
+ models/cohere2-iswa.cpp
55
+ models/command-r.cpp
56
+ models/dbrx.cpp
57
+ models/deci.cpp
58
+ models/deepseek.cpp
59
+ models/deepseek2.cpp
60
+ models/delta-net-base.cpp
61
+ models/dots1.cpp
62
+ models/dream.cpp
63
+ models/ernie4-5-moe.cpp
64
+ models/ernie4-5.cpp
65
+ models/eurobert.cpp
66
+ models/exaone-moe.cpp
67
+ models/exaone.cpp
68
+ models/exaone4.cpp
69
+ models/falcon-h1.cpp
70
+ models/falcon.cpp
71
+ models/gemma-embedding.cpp
72
+ models/gemma.cpp
73
+ models/gemma2-iswa.cpp
74
+ models/gemma3.cpp
75
+ models/gemma3n-iswa.cpp
76
+ models/glm4-moe.cpp
77
+ models/glm4.cpp
78
+ models/gpt2.cpp
79
+ models/gptneox.cpp
80
+ models/granite-hybrid.cpp
81
+ models/granite.cpp
82
+ models/grok.cpp
83
+ models/grovemoe.cpp
84
+ models/hunyuan-dense.cpp
85
+ models/hunyuan-moe.cpp
86
+ models/internlm2.cpp
87
+ models/jais.cpp
88
+ models/jais2.cpp
89
+ models/jamba.cpp
90
+ models/kimi-linear.cpp
91
+ models/lfm2.cpp
92
+ models/llada-moe.cpp
93
+ models/llada.cpp
94
+ models/llama-iswa.cpp
95
+ models/llama.cpp
96
+ models/maincoder.cpp
97
+ models/mamba-base.cpp
98
+ models/mamba.cpp
99
+ models/mimo2-iswa.cpp
100
+ models/minicpm3.cpp
101
+ models/minimax-m2.cpp
102
+ models/mistral3.cpp
103
+ models/modern-bert.cpp
104
+ models/mpt.cpp
105
+ models/nemotron-h.cpp
106
+ models/nemotron.cpp
107
+ models/neo-bert.cpp
108
+ models/olmo.cpp
109
+ models/olmo2.cpp
110
+ models/olmoe.cpp
111
+ models/openai-moe-iswa.cpp
112
+ models/openelm.cpp
113
+ models/orion.cpp
114
+ models/paddleocr.cpp
115
+ models/pangu-embedded.cpp
116
+ models/phi2.cpp
117
+ models/phi3.cpp
118
+ models/plamo.cpp
119
+ models/plamo2.cpp
120
+ models/plamo3.cpp
121
+ models/plm.cpp
122
+ models/qwen.cpp
123
+ models/qwen2.cpp
124
+ models/qwen2moe.cpp
125
+ models/qwen2vl.cpp
126
+ models/qwen3.cpp
127
+ models/qwen35.cpp
128
+ models/qwen35moe.cpp
129
+ models/qwen3moe.cpp
130
+ models/qwen3next.cpp
131
+ models/qwen3vl-moe.cpp
132
+ models/qwen3vl.cpp
133
+ models/refact.cpp
134
+ models/rnd1.cpp
135
+ models/rwkv6-base.cpp
136
+ models/rwkv6.cpp
137
+ models/rwkv6qwen2.cpp
138
+ models/rwkv7-base.cpp
139
+ models/rwkv7.cpp
140
+ models/seed-oss.cpp
141
+ models/smallthinker.cpp
142
+ models/smollm3.cpp
143
+ models/stablelm.cpp
144
+ models/starcoder.cpp
145
+ models/starcoder2.cpp
146
+ models/step35-iswa.cpp
147
+ models/t5-dec.cpp
148
+ models/t5-enc.cpp
149
+ models/wavtokenizer-dec.cpp
150
+ models/xverse.cpp
151
+ )
152
+
153
+ set_target_properties(llama PROPERTIES
154
+ VERSION ${LLAMA_INSTALL_VERSION}
155
+ SOVERSION 0
156
+ MACHO_CURRENT_VERSION 0 # keep macOS linker from seeing oversized version number
157
+ )
158
+
159
+ target_include_directories(llama PRIVATE .)
160
+ target_include_directories(llama PUBLIC ../include)
161
+ target_compile_features (llama PRIVATE cxx_std_17) # don't bump
162
+
163
+ target_link_libraries(llama PUBLIC ggml)
164
+
165
+ if (BUILD_SHARED_LIBS)
166
+ set_target_properties(llama PROPERTIES POSITION_INDEPENDENT_CODE ON)
167
+ target_compile_definitions(llama PRIVATE LLAMA_BUILD)
168
+ target_compile_definitions(llama PUBLIC LLAMA_SHARED)
169
+ endif()