cui-llama.rn 1.7.3 → 1.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. package/README.md +217 -17
  2. package/android/src/main/CMakeLists.txt +34 -15
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +94 -8
  4. package/android/src/main/java/com/rnllama/RNLlama.java +247 -0
  5. package/android/src/main/jni.cpp +213 -14
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  14. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
  15. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
  16. package/cpp/README.md +1 -1
  17. package/cpp/chat-parser.cpp +385 -0
  18. package/cpp/chat-parser.h +120 -0
  19. package/cpp/chat.cpp +726 -596
  20. package/cpp/chat.h +71 -6
  21. package/cpp/common.cpp +56 -38
  22. package/cpp/common.h +9 -3
  23. package/cpp/ggml-backend-reg.cpp +5 -0
  24. package/cpp/ggml-backend.cpp +10 -2
  25. package/cpp/ggml-common.h +4 -0
  26. package/cpp/ggml-cpu/amx/amx.cpp +1 -1
  27. package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
  28. package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  29. package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
  30. package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
  31. package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  32. package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
  33. package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
  34. package/cpp/ggml-cpu/arch-fallback.h +184 -0
  35. package/cpp/ggml-cpu/common.h +4 -3
  36. package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
  37. package/cpp/ggml-cpu/ggml-cpu.c +123 -104
  38. package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
  39. package/cpp/ggml-cpu/ops.cpp +330 -148
  40. package/cpp/ggml-cpu/ops.h +1 -0
  41. package/cpp/ggml-cpu/quants.c +1158 -0
  42. package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
  43. package/cpp/ggml-cpu/repack.cpp +1571 -0
  44. package/cpp/ggml-cpu/repack.h +98 -0
  45. package/cpp/ggml-cpu/simd-mappings.h +330 -38
  46. package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
  47. package/cpp/ggml-cpu/vec.cpp +87 -18
  48. package/cpp/ggml-cpu/vec.h +249 -94
  49. package/cpp/ggml-cpu.h +1 -0
  50. package/cpp/ggml-impl.h +63 -183
  51. package/cpp/ggml-llama-sim.metallib +0 -0
  52. package/cpp/ggml-llama.metallib +0 -0
  53. package/cpp/ggml-metal.m +152 -45
  54. package/cpp/ggml-quants.c +0 -2
  55. package/cpp/ggml.c +61 -21
  56. package/cpp/ggml.h +22 -3
  57. package/cpp/gguf.cpp +24 -3
  58. package/cpp/json-partial.cpp +256 -0
  59. package/cpp/json-partial.h +38 -0
  60. package/cpp/json-schema-to-grammar.cpp +5 -47
  61. package/cpp/json-schema-to-grammar.h +4 -4
  62. package/cpp/llama-arch.cpp +153 -3
  63. package/cpp/llama-arch.h +27 -1
  64. package/cpp/llama-batch.cpp +741 -272
  65. package/cpp/llama-batch.h +112 -54
  66. package/cpp/llama-chat.cpp +30 -8
  67. package/cpp/llama-chat.h +1 -0
  68. package/cpp/llama-context.cpp +524 -339
  69. package/cpp/llama-context.h +38 -17
  70. package/cpp/llama-cparams.cpp +4 -0
  71. package/cpp/llama-cparams.h +2 -0
  72. package/cpp/llama-grammar.cpp +12 -2
  73. package/cpp/llama-graph.cpp +431 -356
  74. package/cpp/llama-graph.h +126 -58
  75. package/cpp/llama-hparams.cpp +10 -2
  76. package/cpp/llama-hparams.h +19 -2
  77. package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
  78. package/cpp/llama-kv-cache-unified-iswa.h +128 -0
  79. package/cpp/llama-kv-cache-unified.cpp +1841 -0
  80. package/cpp/llama-kv-cache-unified.h +303 -0
  81. package/cpp/llama-kv-cells.h +439 -0
  82. package/cpp/llama-memory-hybrid.cpp +246 -0
  83. package/cpp/llama-memory-hybrid.h +138 -0
  84. package/cpp/llama-memory-recurrent.cpp +1112 -0
  85. package/cpp/llama-memory-recurrent.h +183 -0
  86. package/cpp/llama-memory.cpp +41 -0
  87. package/cpp/llama-memory.h +86 -5
  88. package/cpp/llama-mmap.cpp +1 -1
  89. package/cpp/llama-model-loader.cpp +42 -17
  90. package/cpp/llama-model-saver.cpp +1 -0
  91. package/cpp/llama-model.cpp +1639 -513
  92. package/cpp/llama-model.h +26 -0
  93. package/cpp/llama-sampling.cpp +2 -2
  94. package/cpp/llama-vocab.cpp +65 -28
  95. package/cpp/llama-vocab.h +1 -0
  96. package/cpp/llama.cpp +11 -7
  97. package/cpp/llama.h +150 -42
  98. package/cpp/minja/chat-template.hpp +1 -1
  99. package/cpp/minja/minja.hpp +1 -1
  100. package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
  101. package/cpp/nlohmann/json_fwd.hpp +187 -0
  102. package/cpp/regex-partial.cpp +204 -0
  103. package/cpp/regex-partial.h +56 -0
  104. package/cpp/rn-llama.cpp +646 -35
  105. package/cpp/rn-llama.h +32 -1
  106. package/cpp/rn-tts.h +39 -0
  107. package/cpp/sampling.cpp +7 -8
  108. package/cpp/tools/mtmd/clip-impl.h +5 -0
  109. package/cpp/tools/mtmd/clip.cpp +572 -436
  110. package/cpp/tools/mtmd/clip.h +14 -4
  111. package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
  112. package/cpp/tools/mtmd/mtmd-audio.h +2 -17
  113. package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
  114. package/cpp/tools/mtmd/mtmd-helper.h +91 -0
  115. package/cpp/tools/mtmd/mtmd.cpp +368 -248
  116. package/cpp/tools/mtmd/mtmd.h +6 -70
  117. package/cpp/unicode.cpp +5 -0
  118. package/ios/CMakeLists.txt +26 -6
  119. package/ios/RNLlama.h +1 -1
  120. package/ios/RNLlama.mm +153 -3
  121. package/ios/RNLlamaContext.h +9 -1
  122. package/ios/RNLlamaContext.mm +112 -9
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
  131. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  132. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
  133. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
  134. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  135. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
  136. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
  137. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
  138. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
  139. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  140. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  141. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  142. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  143. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  144. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
  145. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
  146. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
  147. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
  148. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  149. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
  150. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
  151. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  152. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
  153. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
  154. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
  155. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  156. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
  160. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
  161. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
  162. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
  163. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
  164. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
  165. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  166. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
  167. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
  168. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  169. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
  170. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
  171. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
  172. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
  173. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  174. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  175. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  176. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  177. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  178. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
  179. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
  180. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
  181. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
  182. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  183. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
  184. package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
  185. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  186. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
  187. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
  188. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
  189. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  190. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  191. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
  192. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
  193. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
  194. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
  195. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
  196. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
  197. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
  198. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
  199. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  200. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
  201. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
  202. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  203. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
  204. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
  205. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
  206. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
  207. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  208. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  209. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  210. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  211. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  212. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
  213. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
  214. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
  215. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
  216. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  217. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
  218. package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
  219. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  220. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
  221. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
  222. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
  223. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  224. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  225. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
  226. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
  227. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
  228. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
  229. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
  230. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
  231. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
  232. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
  233. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  234. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
  235. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
  236. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  237. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
  238. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
  239. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
  240. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
  241. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  242. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  243. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  244. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  245. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  246. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
  247. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
  248. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
  249. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
  250. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  251. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
  252. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
  253. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  254. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
  255. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
  256. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
  257. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  258. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  259. package/jest/mock.js +24 -0
  260. package/package.json +1 -1
  261. package/src/NativeRNLlama.ts +46 -2
  262. package/src/index.ts +105 -1
  263. package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  264. package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
  265. package/cpp/ggml-cpu/sgemm.cpp +0 -3544
  266. package/cpp/ggml-cpu/sgemm.h +0 -14
  267. package/cpp/llama-kv-cache.cpp +0 -2827
  268. package/cpp/llama-kv-cache.h +0 -515
  269. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  270. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  271. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  272. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
  273. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  274. /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
  275. /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
  276. /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
@@ -4,6 +4,8 @@
4
4
  #include <stddef.h>
5
5
  #include <stdint.h>
6
6
 
7
+ // !!! Internal header, to be used by mtmd only !!!
8
+
7
9
  struct clip_ctx;
8
10
 
9
11
  struct clip_image_size {
@@ -15,12 +17,22 @@ struct clip_image_f32;
15
17
  struct clip_image_u8_batch;
16
18
  struct clip_image_f32_batch;
17
19
 
20
+ enum clip_modality {
21
+ CLIP_MODALITY_VISION,
22
+ CLIP_MODALITY_AUDIO,
23
+ };
24
+
18
25
  struct clip_context_params {
19
26
  bool use_gpu;
20
27
  enum lm_ggml_log_level verbosity;
21
28
  };
22
29
 
23
- struct clip_ctx * clip_init(const char * fname, struct clip_context_params ctx_params);
30
+ struct clip_init_result {
31
+ struct clip_ctx * ctx_v; // vision context
32
+ struct clip_ctx * ctx_a; // audio context
33
+ };
34
+
35
+ struct clip_init_result clip_init(const char * fname, struct clip_context_params ctx_params);
24
36
 
25
37
  void clip_free(struct clip_ctx * ctx);
26
38
 
@@ -34,9 +46,6 @@ int32_t clip_get_hidden_size(const struct clip_ctx * ctx);
34
46
  // TODO: should be enum, not string
35
47
  const char * clip_patch_merge_type(const struct clip_ctx * ctx);
36
48
 
37
- const int32_t * clip_image_grid(const struct clip_ctx * ctx);
38
- size_t get_clip_image_grid_size(const struct clip_ctx * ctx);
39
-
40
49
  int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * img);
41
50
 
42
51
  // for M-RoPE, this will be the number of token positions in X and Y directions
@@ -99,3 +108,4 @@ void clip_image_f32_batch_add_mel(struct clip_image_f32_batch * batch, int n_mel
99
108
 
100
109
  bool clip_has_vision_encoder(const struct clip_ctx * ctx);
101
110
  bool clip_has_audio_encoder(const struct clip_ctx * ctx);
111
+ bool clip_has_whisper_encoder(const struct clip_ctx * ctx);
@@ -1,28 +1,5 @@
1
- // fix problem with std::min and std::max
2
- #if defined(_WIN32)
3
- #define WIN32_LEAN_AND_MEAN
4
- #ifndef NOMINMAX
5
- # define NOMINMAX
6
- #endif
7
- #include <windows.h>
8
- #endif
9
-
10
1
  #include "mtmd-audio.h"
11
2
 
12
- //#define MTMD_AUDIO_DEBUG
13
-
14
- #define MINIAUDIO_IMPLEMENTATION
15
- #ifndef MTMD_AUDIO_DEBUG
16
- # define MA_NO_ENCODING
17
- #endif
18
- #define MA_NO_DEVICE_IO
19
- #define MA_NO_RESOURCE_MANAGER
20
- #define MA_NO_NODE_GRAPH
21
- #define MA_NO_ENGINE
22
- #define MA_NO_GENERATION
23
- #define MA_API static
24
- #include "miniaudio.h"
25
-
26
3
  #define _USE_MATH_DEFINES // for M_PI
27
4
  #include <cmath>
28
5
  #include <cstdint>
@@ -359,69 +336,6 @@ bool preprocess_audio(
359
336
  } // namespace whisper_preprocessor
360
337
 
361
338
 
362
- namespace audio_helpers {
363
-
364
- bool is_audio_file(const char * buf, size_t len) {
365
- if (len < 12) {
366
- return false;
367
- }
368
-
369
- // RIFF ref: https://en.wikipedia.org/wiki/Resource_Interchange_File_Format
370
- // WAV ref: https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
371
- bool is_wav = memcmp(buf, "RIFF", 4) == 0 && memcmp(buf + 8, "WAVE", 4) == 0;
372
- bool is_mp3 = len >= 3 && (
373
- memcmp(buf, "ID3", 3) == 0 ||
374
- // Check for MPEG sync word (simplified check)
375
- ((unsigned char)buf[0] == 0xFF && ((unsigned char)buf[1] & 0xE0) == 0xE0)
376
- );
377
- bool is_flac = memcmp(buf, "fLaC", 4) == 0;
378
-
379
- return is_wav || is_mp3 || is_flac;
380
- }
381
-
382
- // returns true if the buffer is a valid audio file
383
- bool decode_audio_from_buf(const unsigned char * buf_in, size_t len, int target_sampler_rate, std::vector<float> & pcmf32_mono) {
384
- ma_result result;
385
- const int channels = 1;
386
- ma_decoder_config decoder_config = ma_decoder_config_init(ma_format_f32, channels, target_sampler_rate);
387
- ma_decoder decoder;
388
-
389
- result = ma_decoder_init_memory(buf_in, len, &decoder_config, &decoder);
390
- if (result != MA_SUCCESS) {
391
- return false;
392
- }
393
-
394
- ma_uint64 frame_count;
395
- ma_uint64 frames_read;
396
- result = ma_decoder_get_length_in_pcm_frames(&decoder, &frame_count);
397
- if (result != MA_SUCCESS) {
398
- ma_decoder_uninit(&decoder);
399
- return false;
400
- }
401
-
402
- pcmf32_mono.resize(frame_count);
403
- result = ma_decoder_read_pcm_frames(&decoder, pcmf32_mono.data(), frame_count, &frames_read);
404
- if (result != MA_SUCCESS) {
405
- ma_decoder_uninit(&decoder);
406
- return false;
407
- }
408
-
409
- #ifdef MTMD_AUDIO_DEBUG
410
- // save audio to wav file
411
- ma_encoder_config config = ma_encoder_config_init(ma_encoding_format_wav, ma_format_f32, 1, target_sampler_rate);
412
- ma_encoder encoder;
413
- ma_encoder_init_file("output.wav", &config, &encoder);
414
- ma_encoder_write_pcm_frames(&encoder, pcmf32_mono.data(), pcmf32_mono.size(), &frames_read);
415
- ma_encoder_uninit(&encoder);
416
- #endif
417
-
418
- ma_decoder_uninit(&decoder);
419
- return true;
420
- }
421
-
422
- } // namespace wav_utils
423
-
424
-
425
339
  // precalculated mel filter banks
426
340
  // values are multiplied by 1000.0 to save space, and will be divided by 1000.0 in the end of the function
427
341
  //
@@ -32,7 +32,7 @@ struct whisper_filters {
32
32
  std::vector<float> data;
33
33
  };
34
34
 
35
- extern bool preprocess_audio(
35
+ bool preprocess_audio(
36
36
  const float * samples,
37
37
  size_t n_samples,
38
38
  const whisper_filters & filters,
@@ -40,23 +40,8 @@ extern bool preprocess_audio(
40
40
 
41
41
  } // namespace whisper_preprocessor
42
42
 
43
-
44
- // TODO @ngxson : move this helper to mtmd-helpers.cpp
45
- namespace audio_helpers {
46
-
47
- extern bool is_audio_file(const char * buf, size_t len);
48
-
49
- extern bool decode_audio_from_buf(
50
- const unsigned char * buf_in,
51
- size_t len,
52
- int target_sampler_rate,
53
- std::vector<float> & pcmf32_mono);
54
-
55
- } // namespace audio_helpers
56
-
57
-
58
43
  namespace whisper_precalc_filters {
59
44
 
60
- extern whisper_preprocessor::whisper_filters get_128_bins();
45
+ whisper_preprocessor::whisper_filters get_128_bins();
61
46
 
62
47
  } // namespace whisper_precalc_filters
@@ -1,10 +1,37 @@
1
+ // fix problem with std::min and std::max
2
+ #if defined(_WIN32)
3
+ #define WIN32_LEAN_AND_MEAN
4
+ #ifndef NOMINMAX
5
+ # define NOMINMAX
6
+ #endif
7
+ #include <windows.h>
8
+ #endif
9
+
1
10
  #include "mtmd.h"
11
+ #include "mtmd-helper.h"
2
12
  #include "llama.h"
3
13
 
4
14
  #include <algorithm>
5
15
  #include <cinttypes>
6
16
  #include <vector>
7
17
 
18
+ //#define MTMD_AUDIO_DEBUG
19
+
20
+ #define MINIAUDIO_IMPLEMENTATION
21
+ #ifndef MTMD_AUDIO_DEBUG
22
+ # define MA_NO_ENCODING
23
+ #endif
24
+ #define MA_NO_DEVICE_IO
25
+ #define MA_NO_RESOURCE_MANAGER
26
+ #define MA_NO_NODE_GRAPH
27
+ #define MA_NO_ENGINE
28
+ #define MA_NO_GENERATION
29
+ #define MA_API static
30
+ #include "miniaudio/miniaudio.h"
31
+
32
+ #define STB_IMAGE_IMPLEMENTATION
33
+ #include "stb/stb_image.h"
34
+
8
35
  #define LOG_INF(...) fprintf(stdout, __VA_ARGS__)
9
36
  #define LOG_ERR(...) fprintf(stderr, __VA_ARGS__)
10
37
 
@@ -66,7 +93,8 @@ struct decode_embd_batch {
66
93
  }
67
94
  }
68
95
 
69
- void set_position_mrope(llama_pos pos_0, int nx, int ny, llama_seq_id seq_id) {
96
+ // M-RoPE for image
97
+ void set_position_mrope_2d(llama_pos pos_0, int nx, int ny, llama_seq_id seq_id) {
70
98
  LM_GGML_ASSERT(n_pos_per_embd == 4);
71
99
  seq_id_0[0] = seq_id;
72
100
  for (int y = 0; y < ny; y++) {
@@ -85,6 +113,23 @@ struct decode_embd_batch {
85
113
  }
86
114
  }
87
115
 
116
+ // M-RoPE for audio
117
+ void set_position_mrope_1d(llama_pos pos_0, llama_seq_id seq_id) {
118
+ LM_GGML_ASSERT(n_pos_per_embd == 4);
119
+ seq_id_0[0] = seq_id;
120
+ for (int i = 0; i < batch.n_tokens; i++) {
121
+ pos[i ] = pos_0 + i;
122
+ pos[i + batch.n_tokens ] = pos_0 + i;
123
+ pos[i + batch.n_tokens * 2] = pos_0 + i;
124
+ pos[i + batch.n_tokens * 3] = 0; // last pos dim is unused
125
+ }
126
+ for (int i = 0; i < batch.n_tokens; i++) {
127
+ batch.n_seq_id[i] = 1;
128
+ batch.seq_id [i] = seq_id_0.data();
129
+ batch.logits [i] = false;
130
+ }
131
+ }
132
+
88
133
  llama_batch get_view(int offset, int n_tokens) {
89
134
  llama_pos * pos_ptr;
90
135
  pos_view.clear();
@@ -146,18 +191,20 @@ int32_t mtmd_helper_decode_image_chunk(
146
191
  decode_embd_batch batch_embd(encoded_embd, n_tokens, n_pos_per_embd, n_mmproj_embd);
147
192
 
148
193
  if (mtmd_decode_use_mrope(ctx)) {
149
- const auto image_tokens = mtmd_input_chunk_get_tokens_image(chunk);
150
- if (chunk_type != MTMD_INPUT_CHUNK_TYPE_IMAGE) {
151
- LOG_ERR("failed to decode chunk: M-RoPE only accepts image chunk\n");
152
- return -1;
153
- }
154
- if (!image_tokens) {
155
- LOG_ERR("failed to decode chunk: image tokens are null\n");
156
- return -1;
194
+ if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
195
+ const auto image_tokens = mtmd_input_chunk_get_tokens_image(chunk);
196
+ if (!image_tokens) {
197
+ LOG_ERR("failed to decode chunk: image tokens are null\n");
198
+ return -1;
199
+ }
200
+ const int nx = mtmd_image_tokens_get_nx(image_tokens);
201
+ const int ny = mtmd_image_tokens_get_ny(image_tokens);
202
+ batch_embd.set_position_mrope_2d(n_past, nx, ny, seq_id);
203
+ } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
204
+ batch_embd.set_position_mrope_1d(n_past, seq_id);
205
+ } else {
206
+ LM_GGML_ABORT("invalid chunk type for M-RoPE");
157
207
  }
158
- const int nx = mtmd_image_tokens_get_nx(image_tokens);
159
- const int ny = mtmd_image_tokens_get_ny(image_tokens);
160
- batch_embd.set_position_mrope(n_past, nx, ny, seq_id);
161
208
  } else {
162
209
  batch_embd.set_position_normal(n_past, seq_id);
163
210
  }
@@ -264,6 +311,7 @@ int32_t mtmd_helper_eval_chunk_single(mtmd_context * ctx,
264
311
  LM_GGML_ABORT("chunk type not supported");
265
312
  }
266
313
 
314
+ llama_batch_free(text_batch);
267
315
  return 0;
268
316
  }
269
317
 
@@ -295,3 +343,118 @@ int32_t mtmd_helper_eval_chunks(mtmd_context * ctx,
295
343
 
296
344
  return 0;
297
345
  }
346
+
347
+ namespace audio_helpers {
348
+
349
+ static bool is_audio_file(const char * buf, size_t len) {
350
+ if (len < 12) {
351
+ return false;
352
+ }
353
+
354
+ // RIFF ref: https://en.wikipedia.org/wiki/Resource_Interchange_File_Format
355
+ // WAV ref: https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
356
+ bool is_wav = memcmp(buf, "RIFF", 4) == 0 && memcmp(buf + 8, "WAVE", 4) == 0;
357
+ bool is_mp3 = len >= 3 && (
358
+ memcmp(buf, "ID3", 3) == 0 ||
359
+ // Check for MPEG sync word (simplified check)
360
+ ((unsigned char)buf[0] == 0xFF && ((unsigned char)buf[1] & 0xE0) == 0xE0)
361
+ );
362
+ bool is_flac = memcmp(buf, "fLaC", 4) == 0;
363
+
364
+ return is_wav || is_mp3 || is_flac;
365
+ }
366
+
367
+ // returns true if the buffer is a valid audio file
368
+ static bool decode_audio_from_buf(const unsigned char * buf_in, size_t len, int target_sampler_rate, std::vector<float> & pcmf32_mono) {
369
+ ma_result result;
370
+ const int channels = 1;
371
+ ma_decoder_config decoder_config = ma_decoder_config_init(ma_format_f32, channels, target_sampler_rate);
372
+ ma_decoder decoder;
373
+
374
+ result = ma_decoder_init_memory(buf_in, len, &decoder_config, &decoder);
375
+ if (result != MA_SUCCESS) {
376
+ return false;
377
+ }
378
+
379
+ ma_uint64 frame_count;
380
+ ma_uint64 frames_read;
381
+ result = ma_decoder_get_length_in_pcm_frames(&decoder, &frame_count);
382
+ if (result != MA_SUCCESS) {
383
+ ma_decoder_uninit(&decoder);
384
+ return false;
385
+ }
386
+
387
+ pcmf32_mono.resize(frame_count);
388
+ result = ma_decoder_read_pcm_frames(&decoder, pcmf32_mono.data(), frame_count, &frames_read);
389
+ if (result != MA_SUCCESS) {
390
+ ma_decoder_uninit(&decoder);
391
+ return false;
392
+ }
393
+
394
+ #ifdef MTMD_AUDIO_DEBUG
395
+ // save audio to wav file
396
+ ma_encoder_config config = ma_encoder_config_init(ma_encoding_format_wav, ma_format_f32, 1, target_sampler_rate);
397
+ ma_encoder encoder;
398
+ ma_encoder_init_file("output.wav", &config, &encoder);
399
+ ma_encoder_write_pcm_frames(&encoder, pcmf32_mono.data(), pcmf32_mono.size(), &frames_read);
400
+ ma_encoder_uninit(&encoder);
401
+ #endif
402
+
403
+ ma_decoder_uninit(&decoder);
404
+ return true;
405
+ }
406
+
407
+ } // namespace audio_helpers
408
+
409
+ mtmd_bitmap * mtmd_helper_bitmap_init_from_buf(mtmd_context * ctx, const unsigned char * buf, size_t len) {
410
+ if (audio_helpers::is_audio_file((const char *)buf, len)) {
411
+ std::vector<float> pcmf32;
412
+ int bitrate = mtmd_get_audio_bitrate(ctx);
413
+ if (bitrate < 0) {
414
+ LOG_ERR("This model does not support audio input\n");
415
+ return nullptr;
416
+ }
417
+ if (!audio_helpers::decode_audio_from_buf(buf, len, bitrate, pcmf32)) {
418
+ LOG_ERR("Unable to read WAV audio file from buffer\n");
419
+ return nullptr;
420
+ }
421
+ return mtmd_bitmap_init_from_audio(pcmf32.size(), pcmf32.data());
422
+ }
423
+
424
+ // otherwise, we assume it's an image
425
+ mtmd_bitmap * result = nullptr;
426
+ {
427
+ int nx, ny, nc;
428
+ auto * data = stbi_load_from_memory(buf, len, &nx, &ny, &nc, 3);
429
+ if (!data) {
430
+ LOG_ERR("%s: failed to decode image bytes\n", __func__);
431
+ return nullptr;
432
+ }
433
+ result = mtmd_bitmap_init(nx, ny, data);
434
+ stbi_image_free(data);
435
+ }
436
+ return result;
437
+ }
438
+
439
+ mtmd_bitmap * mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * fname) {
440
+ std::vector<unsigned char> buf;
441
+ FILE * f = fopen(fname, "rb");
442
+ if (!f) {
443
+ LOG_ERR("Unable to open file %s: %s\n", fname, strerror(errno));
444
+ return nullptr;
445
+ }
446
+
447
+ fseek(f, 0, SEEK_END);
448
+ long file_size = ftell(f);
449
+ fseek(f, 0, SEEK_SET);
450
+ buf.resize(file_size);
451
+
452
+ size_t n_read = fread(buf.data(), 1, file_size, f);
453
+ fclose(f);
454
+ if (n_read != (size_t)file_size) {
455
+ LOG_ERR("Failed to read entire file %s", fname);
456
+ return nullptr;
457
+ }
458
+
459
+ return mtmd_helper_bitmap_init_from_buf(ctx, buf.data(), buf.size());
460
+ }
@@ -0,0 +1,91 @@
1
+ #ifndef MTMD_HELPER_H
2
+ #define MTMD_HELPER_H
3
+
4
+ #include "ggml.h"
5
+ #include "llama.h"
6
+ #include "mtmd.h"
7
+
8
+ #include <stddef.h>
9
+ #include <stdint.h>
10
+ #include <stdbool.h>
11
+
12
+ #ifdef __cplusplus
13
+ extern "C" {
14
+ #endif
15
+
16
+ //
17
+ // libmtmd helper functions
18
+ //
19
+ // Please note that these helpers are not guaranteed to be stable.
20
+ // BREAKING CHANGES are expected.
21
+ //
22
+
23
+ // helper function to construct a mtmd_bitmap from a file
24
+ // it calls mtmd_helper_bitmap_init_from_buf() internally
25
+ // returns nullptr on failure
26
+ // this function is thread-safe
27
+ MTMD_API mtmd_bitmap * mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * fname);
28
+
29
+ // helper function to construct a mtmd_bitmap from a buffer containing a file
30
+ // supported formats:
31
+ // image: formats supported by stb_image: jpg, png, bmp, gif, etc.
32
+ // audio: formats supported by miniaudio: wav, mp3, flac
33
+ // note: audio files will be auto-detected based on magic bytes
34
+ // returns nullptr on failure
35
+ // this function is thread-safe
36
+ MTMD_API mtmd_bitmap * mtmd_helper_bitmap_init_from_buf(mtmd_context * ctx, const unsigned char * buf, size_t len);
37
+
38
+ // helper to count the total number of tokens from a list of chunks, useful to keep track of KV cache
39
+ MTMD_API size_t mtmd_helper_get_n_tokens(const mtmd_input_chunks * chunks);
40
+
41
+ // helper to count the total position of tokens from a list of chunks, useful to keep track of n_past
42
+ // normally, n_pos is equal to n_tokens, but for M-RoPE it is different
43
+ MTMD_API llama_pos mtmd_helper_get_n_pos(const mtmd_input_chunks * chunks);
44
+
45
+ // helper function that automatically:
46
+ // 1. run llama_decode() on text chunks
47
+ // 2. run mtmd_encode() on image chunks, then mtmd_get_output_embd() and then llama_decode()
48
+ // if any of the mtmd_encode() or llama_decode() calls return non-zero, stop and forward the error
49
+ // otherwise, returns 0 on success
50
+ // this function is NOT thread-safe
51
+ MTMD_API int32_t mtmd_helper_eval_chunks(mtmd_context * ctx,
52
+ struct llama_context * lctx,
53
+ const mtmd_input_chunks * chunks,
54
+ llama_pos n_past,
55
+ llama_seq_id seq_id,
56
+ int32_t n_batch,
57
+ bool logits_last,
58
+ llama_pos * new_n_past);
59
+
60
+ // works like mtmd_helper_eval_chunks(), but only for a single chunk
61
+ // this function is NOT thread-safe
62
+ MTMD_API int32_t mtmd_helper_eval_chunk_single(mtmd_context * ctx,
63
+ struct llama_context * lctx,
64
+ const mtmd_input_chunk * chunk,
65
+ llama_pos n_past,
66
+ llama_seq_id seq_id,
67
+ int32_t n_batch,
68
+ bool logits_last,
69
+ llama_pos * new_n_past);
70
+
71
+ // helper function to decode an image whose embeddings have already been calculated
72
+ // this helper will handle batching and pre/post decoding setup (for ex. gemma 3 requires non-causal attention)
73
+ // ret 0 on success, -1 on chunk not being a valid image chunk, 1 on decode failure
74
+ MTMD_API int32_t mtmd_helper_decode_image_chunk(mtmd_context * ctx,
75
+ struct llama_context * lctx,
76
+ const mtmd_input_chunk * chunk,
77
+ float * encoded_embd,
78
+ llama_pos n_past,
79
+ llama_seq_id seq_id,
80
+ int32_t n_batch,
81
+ llama_pos * new_n_past);
82
+
83
+ #ifdef __cplusplus
84
+ } // extern "C"
85
+ #endif
86
+
87
+ //
88
+ // C++ wrappers
89
+ //
90
+
91
+ #endif