cui-llama.rn 1.7.3 → 1.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. package/README.md +217 -17
  2. package/android/src/main/CMakeLists.txt +34 -15
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +94 -8
  4. package/android/src/main/java/com/rnllama/RNLlama.java +247 -0
  5. package/android/src/main/jni.cpp +213 -14
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  14. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
  15. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
  16. package/cpp/README.md +1 -1
  17. package/cpp/chat-parser.cpp +385 -0
  18. package/cpp/chat-parser.h +120 -0
  19. package/cpp/chat.cpp +726 -596
  20. package/cpp/chat.h +71 -6
  21. package/cpp/common.cpp +56 -38
  22. package/cpp/common.h +9 -3
  23. package/cpp/ggml-backend-reg.cpp +5 -0
  24. package/cpp/ggml-backend.cpp +10 -2
  25. package/cpp/ggml-common.h +4 -0
  26. package/cpp/ggml-cpu/amx/amx.cpp +1 -1
  27. package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
  28. package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  29. package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
  30. package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
  31. package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  32. package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
  33. package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
  34. package/cpp/ggml-cpu/arch-fallback.h +184 -0
  35. package/cpp/ggml-cpu/common.h +4 -3
  36. package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
  37. package/cpp/ggml-cpu/ggml-cpu.c +123 -104
  38. package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
  39. package/cpp/ggml-cpu/ops.cpp +330 -148
  40. package/cpp/ggml-cpu/ops.h +1 -0
  41. package/cpp/ggml-cpu/quants.c +1158 -0
  42. package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
  43. package/cpp/ggml-cpu/repack.cpp +1571 -0
  44. package/cpp/ggml-cpu/repack.h +98 -0
  45. package/cpp/ggml-cpu/simd-mappings.h +330 -38
  46. package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
  47. package/cpp/ggml-cpu/vec.cpp +87 -18
  48. package/cpp/ggml-cpu/vec.h +249 -94
  49. package/cpp/ggml-cpu.h +1 -0
  50. package/cpp/ggml-impl.h +63 -183
  51. package/cpp/ggml-llama-sim.metallib +0 -0
  52. package/cpp/ggml-llama.metallib +0 -0
  53. package/cpp/ggml-metal.m +152 -45
  54. package/cpp/ggml-quants.c +0 -2
  55. package/cpp/ggml.c +61 -21
  56. package/cpp/ggml.h +22 -3
  57. package/cpp/gguf.cpp +24 -3
  58. package/cpp/json-partial.cpp +256 -0
  59. package/cpp/json-partial.h +38 -0
  60. package/cpp/json-schema-to-grammar.cpp +5 -47
  61. package/cpp/json-schema-to-grammar.h +4 -4
  62. package/cpp/llama-arch.cpp +153 -3
  63. package/cpp/llama-arch.h +27 -1
  64. package/cpp/llama-batch.cpp +741 -272
  65. package/cpp/llama-batch.h +112 -54
  66. package/cpp/llama-chat.cpp +30 -8
  67. package/cpp/llama-chat.h +1 -0
  68. package/cpp/llama-context.cpp +524 -339
  69. package/cpp/llama-context.h +38 -17
  70. package/cpp/llama-cparams.cpp +4 -0
  71. package/cpp/llama-cparams.h +2 -0
  72. package/cpp/llama-grammar.cpp +12 -2
  73. package/cpp/llama-graph.cpp +431 -356
  74. package/cpp/llama-graph.h +126 -58
  75. package/cpp/llama-hparams.cpp +10 -2
  76. package/cpp/llama-hparams.h +19 -2
  77. package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
  78. package/cpp/llama-kv-cache-unified-iswa.h +128 -0
  79. package/cpp/llama-kv-cache-unified.cpp +1841 -0
  80. package/cpp/llama-kv-cache-unified.h +303 -0
  81. package/cpp/llama-kv-cells.h +439 -0
  82. package/cpp/llama-memory-hybrid.cpp +246 -0
  83. package/cpp/llama-memory-hybrid.h +138 -0
  84. package/cpp/llama-memory-recurrent.cpp +1112 -0
  85. package/cpp/llama-memory-recurrent.h +183 -0
  86. package/cpp/llama-memory.cpp +41 -0
  87. package/cpp/llama-memory.h +86 -5
  88. package/cpp/llama-mmap.cpp +1 -1
  89. package/cpp/llama-model-loader.cpp +42 -17
  90. package/cpp/llama-model-saver.cpp +1 -0
  91. package/cpp/llama-model.cpp +1639 -513
  92. package/cpp/llama-model.h +26 -0
  93. package/cpp/llama-sampling.cpp +2 -2
  94. package/cpp/llama-vocab.cpp +65 -28
  95. package/cpp/llama-vocab.h +1 -0
  96. package/cpp/llama.cpp +11 -7
  97. package/cpp/llama.h +150 -42
  98. package/cpp/minja/chat-template.hpp +1 -1
  99. package/cpp/minja/minja.hpp +1 -1
  100. package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
  101. package/cpp/nlohmann/json_fwd.hpp +187 -0
  102. package/cpp/regex-partial.cpp +204 -0
  103. package/cpp/regex-partial.h +56 -0
  104. package/cpp/rn-llama.cpp +646 -35
  105. package/cpp/rn-llama.h +32 -1
  106. package/cpp/rn-tts.h +39 -0
  107. package/cpp/sampling.cpp +7 -8
  108. package/cpp/tools/mtmd/clip-impl.h +5 -0
  109. package/cpp/tools/mtmd/clip.cpp +572 -436
  110. package/cpp/tools/mtmd/clip.h +14 -4
  111. package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
  112. package/cpp/tools/mtmd/mtmd-audio.h +2 -17
  113. package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
  114. package/cpp/tools/mtmd/mtmd-helper.h +91 -0
  115. package/cpp/tools/mtmd/mtmd.cpp +368 -248
  116. package/cpp/tools/mtmd/mtmd.h +6 -70
  117. package/cpp/unicode.cpp +5 -0
  118. package/ios/CMakeLists.txt +26 -6
  119. package/ios/RNLlama.h +1 -1
  120. package/ios/RNLlama.mm +153 -3
  121. package/ios/RNLlamaContext.h +9 -1
  122. package/ios/RNLlamaContext.mm +112 -9
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
  131. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  132. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
  133. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
  134. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  135. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
  136. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
  137. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
  138. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
  139. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  140. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  141. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  142. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  143. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  144. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
  145. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
  146. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
  147. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
  148. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  149. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
  150. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
  151. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  152. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
  153. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
  154. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
  155. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  156. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
  160. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
  161. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
  162. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
  163. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
  164. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
  165. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  166. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
  167. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
  168. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  169. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
  170. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
  171. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
  172. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
  173. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  174. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  175. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  176. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  177. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  178. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
  179. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
  180. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
  181. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
  182. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  183. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
  184. package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
  185. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  186. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
  187. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
  188. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
  189. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  190. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  191. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
  192. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
  193. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
  194. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
  195. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
  196. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
  197. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
  198. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
  199. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  200. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
  201. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
  202. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  203. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
  204. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
  205. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
  206. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
  207. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  208. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  209. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  210. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  211. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  212. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
  213. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
  214. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
  215. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
  216. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  217. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
  218. package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
  219. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  220. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
  221. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
  222. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
  223. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  224. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  225. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
  226. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
  227. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
  228. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
  229. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
  230. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
  231. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
  232. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
  233. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  234. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
  235. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
  236. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  237. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
  238. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
  239. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
  240. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
  241. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  242. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  243. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  244. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  245. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  246. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
  247. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
  248. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
  249. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
  250. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  251. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
  252. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
  253. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  254. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
  255. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
  256. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
  257. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  258. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  259. package/jest/mock.js +24 -0
  260. package/package.json +1 -1
  261. package/src/NativeRNLlama.ts +46 -2
  262. package/src/index.ts +105 -1
  263. package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  264. package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
  265. package/cpp/ggml-cpu/sgemm.cpp +0 -3544
  266. package/cpp/ggml-cpu/sgemm.h +0 -14
  267. package/cpp/llama-kv-cache.cpp +0 -2827
  268. package/cpp/llama-kv-cache.h +0 -515
  269. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  270. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  271. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  272. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
  273. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  274. /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
  275. /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
  276. /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
package/README.md CHANGED
@@ -55,6 +55,8 @@ For get a GGUF model or quantize manually, see [`Prepare and Quantize`](https://
55
55
 
56
56
  ## Usage
57
57
 
58
+ > **💡 New!** `llama.rn` now supports **multimodal models** with vision and audio capabilities! See the [Multimodal section](#multimodal-vision--audio) for details.
59
+
58
60
  Load model info only:
59
61
 
60
62
  ```js
@@ -123,49 +125,162 @@ console.log('Result:', textResult.text)
123
125
  console.log('Timings:', textResult.timings)
124
126
  ```
125
127
 
126
- The bindings deisgn inspired by [server.cpp](https://github.com/ggerganov/llama.cpp/tree/master/examples/server) example in llama.cpp:
128
+ The binding's deisgn inspired by [server.cpp](https://github.com/ggerganov/llama.cpp/tree/master/examples/server) example in llama.cpp:
127
129
 
128
130
  - `/completion` and `/chat/completions`: `context.completion(params, partialCompletionCallback)`
129
131
  - `/tokenize`: `context.tokenize(content)`
130
132
  - `/detokenize`: `context.detokenize(tokens)`
131
133
  - `/embedding`: `context.embedding(content)`
134
+ - `/rerank`: `context.rerank(query, documents, params)`
132
135
  - ... Other methods
133
136
 
134
137
  Please visit the [Documentation](docs/API) for more details.
135
138
 
136
139
  You can also visit the [example](example) to see how to use it.
137
140
 
138
- ## Session (State)
141
+ ## Multimodal (Vision & Audio)
139
142
 
140
- The session file is a binary file that contains the state of the context, it can saves time of prompt processing.
143
+ `llama.rn` supports multimodal capabilities including vision (images) and audio processing. This allows you to interact with models that can understand both text and media content.
144
+
145
+ ### Supported Media Formats
146
+
147
+ **Images (Vision):**
148
+ - JPEG, PNG, BMP, GIF, TGA, HDR, PIC, PNM
149
+ - Base64 encoded images (data URLs)
150
+ - Local file paths
151
+ - \* Not supported HTTP URLs yet
152
+
153
+ **Audio:**
154
+ - WAV, MP3 formats
155
+ - Base64 encoded audio (data URLs)
156
+ - Local file paths
157
+ - \* Not supported HTTP URLs yet
158
+
159
+ ### Setup
160
+
161
+ First, you need a multimodal model and its corresponding multimodal projector (mmproj) file, see [how to obtain mmproj](https://github.com/ggml-org/llama.cpp/tree/master/tools/mtmd#how-to-obtain-mmproj) for more details.
162
+
163
+ ### Initialize Multimodal Support
141
164
 
142
165
  ```js
143
- const context = await initLlama({ ...params })
166
+ import { initLlama } from 'llama.rn'
144
167
 
145
- // After prompt processing or completion ...
168
+ // First initialize the model context
169
+ const context = await initLlama({
170
+ model: 'path/to/your/multimodal-model.gguf',
171
+ n_ctx: 4096,
172
+ n_gpu_layers: 99, // Recommended for multimodal models
173
+ // Important: Disable context shifting for multimodal
174
+ ctx_shift: false,
175
+ })
146
176
 
147
- // Save the session
148
- await context.saveSession('<path to save session>')
177
+ // Initialize multimodal support with mmproj file
178
+ const success = await context.initMultimodal({
179
+ path: 'path/to/your/mmproj-model.gguf',
180
+ use_gpu: true, // Recommended for better performance
181
+ })
149
182
 
150
- // Load the session
151
- await context.loadSession('<path to load session>')
183
+ // Check if multimodal is enabled
184
+ console.log('Multimodal enabled:', await context.isMultimodalEnabled())
185
+
186
+ if (success) {
187
+ console.log('Multimodal support initialized!')
188
+
189
+ // Check what modalities are supported
190
+ const support = await context.getMultimodalSupport()
191
+ console.log('Vision support:', support.vision)
192
+ console.log('Audio support:', support.audio)
193
+ } else {
194
+ console.log('Failed to initialize multimodal support')
195
+ }
196
+
197
+ // Release multimodal context
198
+ await context.releaseMultimodal()
152
199
  ```
153
200
 
154
- ## Embedding
201
+ ### Usage Examples
155
202
 
156
- The embedding API is used to get the embedding of a text.
203
+ #### Vision (Image Processing)
157
204
 
158
205
  ```js
159
- const context = await initLlama({
160
- ...params,
161
- embedding: true,
206
+ const result = await context.completion({
207
+ messages: [
208
+ {
209
+ role: 'user',
210
+ content: [
211
+ {
212
+ type: 'text',
213
+ text: 'What do you see in this image?',
214
+ },
215
+ {
216
+ type: 'image_url',
217
+ image_url: {
218
+ url: 'file:///path/to/image.jpg',
219
+ // or base64: '...'
220
+ },
221
+ },
222
+ ],
223
+ },
224
+ ],
225
+ n_predict: 100,
226
+ temperature: 0.1,
162
227
  })
163
228
 
164
- const { embedding } = await context.embedding('Hello, world!')
229
+ console.log('AI Response:', result.text)
165
230
  ```
166
231
 
167
- - You can use model like [nomic-ai/nomic-embed-text-v1.5-GGUF](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF) for better embedding quality.
168
- - You can use DB like [op-sqlite](https://github.com/OP-Engineering/op-sqlite) with sqlite-vec support to store and search embeddings.
232
+ #### Audio Processing
233
+
234
+ ```js
235
+ // Method 1: Using structured message content (Recommended)
236
+ const result = await context.completion({
237
+ messages: [
238
+ {
239
+ role: 'user',
240
+ content: [
241
+ {
242
+ type: 'text',
243
+ text: 'Transcribe or describe this audio:',
244
+ },
245
+ {
246
+ type: 'input_audio',
247
+ input_audio: {
248
+ data: 'data:audio/wav;base64,UklGRiQAAABXQVZFZm10...',
249
+ // or url: 'file:///path/to/audio.wav',
250
+ format: 'wav', // or 'mp3'
251
+ },
252
+ },
253
+ ],
254
+ },
255
+ ],
256
+ n_predict: 200,
257
+ })
258
+
259
+ console.log('Transcription:', result.text)
260
+ ```
261
+
262
+ ### Tokenization with Media
263
+
264
+ ```js
265
+ // Tokenize text with media
266
+ const tokenizeResult = await context.tokenize(
267
+ 'Describe this image: <__media__>',
268
+ {
269
+ media_paths: ['file:///path/to/image.jpg']
270
+ }
271
+ )
272
+
273
+ console.log('Tokens:', tokenizeResult.tokens)
274
+ console.log('Has media:', tokenizeResult.has_media)
275
+ console.log('Media positions:', tokenizeResult.chunk_pos_media)
276
+ ```
277
+
278
+ ### Notes
279
+
280
+ - **Context Shifting**: Multimodal models require `ctx_shift: false` to maintain media token positioning
281
+ - **Memory**: Multimodal models require more memory; use adequate `n_ctx` and consider GPU offloading
282
+ - **Media Markers**: The system automatically handles `<__media__>` markers in prompts. When using structured message content, media items are automatically replaced with this marker
283
+ - **Model Compatibility**: Ensure your model supports the media type you're trying to process
169
284
 
170
285
  ## Tool Calling
171
286
 
@@ -289,6 +404,91 @@ console.log('Result:', text)
289
404
 
290
405
  Also, this is how `json_schema` works in `response_format` during completion, it converts the json_schema to gbnf grammar.
291
406
 
407
+ ## Session (State)
408
+
409
+ The session file is a binary file that contains the state of the context, it can saves time of prompt processing.
410
+
411
+ ```js
412
+ const context = await initLlama({ ...params })
413
+
414
+ // After prompt processing or completion ...
415
+
416
+ // Save the session
417
+ await context.saveSession('<path to save session>')
418
+
419
+ // Load the session
420
+ await context.loadSession('<path to load session>')
421
+ ```
422
+
423
+ ### Notes
424
+
425
+ - \* Session is currently not supported save state from multimodal context, so it only stores the text chunk before the first media chunk.
426
+
427
+ ## Embedding
428
+
429
+ The embedding API is used to get the embedding of a text.
430
+
431
+ ```js
432
+ const context = await initLlama({
433
+ ...params,
434
+ embedding: true,
435
+ })
436
+
437
+ const { embedding } = await context.embedding('Hello, world!')
438
+ ```
439
+
440
+ - You can use model like [nomic-ai/nomic-embed-text-v1.5-GGUF](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF) for better embedding quality.
441
+ - You can use DB like [op-sqlite](https://github.com/OP-Engineering/op-sqlite) with sqlite-vec support to store and search embeddings.
442
+
443
+ ## Rerank
444
+
445
+ The rerank API is used to rank documents based on their relevance to a query. This is particularly useful for improving search results and implementing retrieval-augmented generation (RAG) systems.
446
+
447
+ ```js
448
+ const context = await initLlama({
449
+ ...params,
450
+ embedding: true, // Required for reranking
451
+ pooling_type: 'rank', // Use rank pooling for rerank models
452
+ })
453
+
454
+ // Rerank documents based on relevance to query
455
+ const results = await context.rerank(
456
+ 'What is artificial intelligence?', // query
457
+ [
458
+ 'AI is a branch of computer science.',
459
+ 'The weather is nice today.',
460
+ 'Machine learning is a subset of AI.',
461
+ 'I like pizza.',
462
+ ], // documents to rank
463
+ {
464
+ normalize: 1, // Optional: normalize scores (default: from model config)
465
+ }
466
+ )
467
+
468
+ // Results are automatically sorted by score (highest first)
469
+ results.forEach((result, index) => {
470
+ console.log(`Rank ${index + 1}:`, {
471
+ score: result.score,
472
+ document: result.document,
473
+ originalIndex: result.index,
474
+ })
475
+ })
476
+ ```
477
+
478
+ ### Notes
479
+
480
+ - **Model Requirements**: Reranking requires models with `RANK` pooling type (e.g., reranker models)
481
+ - **Embedding Enabled**: The context must have `embedding: true` to use rerank functionality
482
+ - **Automatic Sorting**: Results are returned sorted by relevance score in descending order
483
+ - **Document Access**: Each result includes the original document text and its index in the input array
484
+ - **Score Interpretation**: Higher scores indicate higher relevance to the query
485
+
486
+ ### Recommended Models
487
+
488
+ - [jinaai - jina-reranker-v2-base-multilingual-GGUF](https://huggingface.co/gpustack/jina-reranker-v2-base-multilingual-GGUF)
489
+ - [BAAI - bge-reranker-v2-m3-GGUF](https://huggingface.co/gpustack/bge-reranker-v2-m3-GGUF)
490
+ - Other models with "rerank" or "reranker" in their name and GGUF format
491
+
292
492
  ## Mock `llama.rn`
293
493
 
294
494
  We have provided a mock version of `llama.rn` for testing purpose you can use on Jest:
@@ -27,12 +27,11 @@ set(
27
27
  ${RNLLAMA_LIB_DIR}/ggml-cpu/amx/mmq.cpp
28
28
  ${RNLLAMA_LIB_DIR}/ggml-cpu/ggml-cpu.c
29
29
  ${RNLLAMA_LIB_DIR}/ggml-cpu/ggml-cpu.cpp
30
- ${RNLLAMA_LIB_DIR}/ggml-cpu/ggml-cpu-aarch64.cpp
31
- ${RNLLAMA_LIB_DIR}/ggml-cpu/ggml-cpu-quants.c
32
- ${RNLLAMA_LIB_DIR}/ggml-cpu/ggml-cpu-traits.cpp
30
+ ${RNLLAMA_LIB_DIR}/ggml-cpu/quants.c
31
+ ${RNLLAMA_LIB_DIR}/ggml-cpu/traits.cpp
32
+ ${RNLLAMA_LIB_DIR}/ggml-cpu/repack.cpp
33
33
  ${RNLLAMA_LIB_DIR}/ggml-cpu/unary-ops.cpp
34
34
  ${RNLLAMA_LIB_DIR}/ggml-cpu/binary-ops.cpp
35
- ${RNLLAMA_LIB_DIR}/ggml-cpu/sgemm.cpp
36
35
  ${RNLLAMA_LIB_DIR}/ggml-cpu/vec.cpp
37
36
  ${RNLLAMA_LIB_DIR}/ggml-cpu/ops.cpp
38
37
  ${RNLLAMA_LIB_DIR}/ggml-opt.cpp
@@ -41,6 +40,9 @@ set(
41
40
  ${RNLLAMA_LIB_DIR}/gguf.cpp
42
41
  ${RNLLAMA_LIB_DIR}/log.cpp
43
42
  ${RNLLAMA_LIB_DIR}/llama-impl.cpp
43
+ ${RNLLAMA_LIB_DIR}/chat-parser.cpp
44
+ ${RNLLAMA_LIB_DIR}/json-partial.cpp
45
+ ${RNLLAMA_LIB_DIR}/regex-partial.cpp
44
46
  # Multimodal support
45
47
  ${RNLLAMA_LIB_DIR}/tools/mtmd/mtmd.cpp
46
48
  ${RNLLAMA_LIB_DIR}/tools/mtmd/mtmd-audio.cpp
@@ -52,7 +54,6 @@ set(
52
54
  ${RNLLAMA_LIB_DIR}/llama-adapter.cpp
53
55
  ${RNLLAMA_LIB_DIR}/llama-chat.cpp
54
56
  ${RNLLAMA_LIB_DIR}/llama-context.cpp
55
- ${RNLLAMA_LIB_DIR}/llama-kv-cache.cpp
56
57
  ${RNLLAMA_LIB_DIR}/llama-arch.cpp
57
58
  ${RNLLAMA_LIB_DIR}/llama-batch.cpp
58
59
  ${RNLLAMA_LIB_DIR}/llama-cparams.cpp
@@ -60,6 +61,10 @@ set(
60
61
  ${RNLLAMA_LIB_DIR}/llama.cpp
61
62
  ${RNLLAMA_LIB_DIR}/llama-model.cpp
62
63
  ${RNLLAMA_LIB_DIR}/llama-model-loader.cpp
64
+ ${RNLLAMA_LIB_DIR}/llama-kv-cache-unified.cpp
65
+ ${RNLLAMA_LIB_DIR}/llama-kv-cache-unified-iswa.cpp
66
+ ${RNLLAMA_LIB_DIR}/llama-memory-hybrid.cpp
67
+ ${RNLLAMA_LIB_DIR}/llama-memory-recurrent.cpp
63
68
  ${RNLLAMA_LIB_DIR}/llama-mmap.cpp
64
69
  ${RNLLAMA_LIB_DIR}/llama-vocab.cpp
65
70
  ${RNLLAMA_LIB_DIR}/llama-memory.cpp
@@ -71,7 +76,8 @@ set(
71
76
  ${RNLLAMA_LIB_DIR}/common.cpp
72
77
  ${RNLLAMA_LIB_DIR}/chat.cpp
73
78
  ${RNLLAMA_LIB_DIR}/json-schema-to-grammar.cpp
74
- ${RNLLAMA_LIB_DIR}/json.hpp
79
+ ${RNLLAMA_LIB_DIR}/nlohmann/json.hpp
80
+ ${RNLLAMA_LIB_DIR}/nlohmann/json_fwd.hpp
75
81
  ${RNLLAMA_LIB_DIR}/minja/minja.hpp
76
82
  ${RNLLAMA_LIB_DIR}/minja/chat-template.hpp
77
83
  ${RNLLAMA_LIB_DIR}/rn-llama.cpp
@@ -81,16 +87,28 @@ set(
81
87
 
82
88
  find_library(LOG_LIB log)
83
89
 
84
- function(build_library target_name cpu_flags)
90
+ function(build_library target_name arch cpu_flags)
91
+ if (NOT ${arch} STREQUAL "generic")
92
+ set(SOURCE_FILES_ARCH
93
+ ${RNLLAMA_LIB_DIR}/ggml-cpu/arch/${arch}/quants.c
94
+ ${RNLLAMA_LIB_DIR}/ggml-cpu/arch/${arch}/repack.cpp
95
+ )
96
+ endif ()
97
+
85
98
  add_library(
86
99
  ${target_name}
87
100
  SHARED
88
101
  ${SOURCE_FILES}
102
+ ${SOURCE_FILES_ARCH}
89
103
  )
90
104
 
91
105
  target_link_libraries(${target_name} ${LOG_LIB} android)
92
106
 
93
- target_compile_options(${target_name} PRIVATE -DLM_GGML_USE_CPU -DLM_GGML_USE_CPU_AARCH64 -DRNLLAMA_USE_FD_FILE -pthread ${cpu_flags})
107
+ if (${arch} STREQUAL "generic")
108
+ target_compile_options(${target_name} PRIVATE -DLM_GGML_CPU_GENERIC)
109
+ endif ()
110
+
111
+ target_compile_options(${target_name} PRIVATE -DLM_GGML_USE_CPU -DLM_GGML_USE_CPU_REPACK -DRNLLAMA_USE_FD_FILE -pthread ${cpu_flags})
94
112
 
95
113
  if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
96
114
  target_compile_options(${target_name} PRIVATE -DRNLLAMA_ANDROID_ENABLE_LOGGING)
@@ -111,17 +129,17 @@ endfunction()
111
129
 
112
130
 
113
131
  # Default target (no specific CPU features)
114
- build_library("rnllama" "")
132
+ build_library("rnllama" "generic" "")
115
133
 
116
134
  if (${ANDROID_ABI} STREQUAL "arm64-v8a")
117
135
  # ARM64 targets
118
136
  # Removing fp16 for now as it leads to issues with some models like deepseek r1 distills
119
137
  # https://github.com/mybigday/llama.rn/pull/110#issuecomment-2609918310
120
- build_library("rnllama_v8" "-march=armv8-a")
121
- build_library("rnllama_v8_2" "-march=armv8.2-a")
122
- build_library("rnllama_v8_2_dotprod" "-march=armv8.2-a+dotprod")
123
- build_library("rnllama_v8_2_i8mm" "-march=armv8.2-a+i8mm")
124
- build_library("rnllama_v8_2_dotprod_i8mm" "-march=armv8.2-a+dotprod+i8mm")
138
+ build_library("rnllama_v8" "arm" "-march=armv8-a")
139
+ build_library("rnllama_v8_2" "arm" "-march=armv8.2-a")
140
+ build_library("rnllama_v8_2_dotprod" "arm" "-march=armv8.2-a+dotprod")
141
+ build_library("rnllama_v8_2_i8mm" "arm" "-march=armv8.2-a+i8mm")
142
+ build_library("rnllama_v8_2_dotprod_i8mm" "arm" "-march=armv8.2-a+dotprod+i8mm")
125
143
 
126
144
  # https://github.com/ggerganov/llama.cpp/blob/master/docs/android.md#cross-compile-using-android-ndk
127
145
  # llama.cpp will deal with the cpu features
@@ -131,5 +149,6 @@ if (${ANDROID_ABI} STREQUAL "arm64-v8a")
131
149
 
132
150
  elseif (${ANDROID_ABI} STREQUAL "x86_64")
133
151
  # x86_64 target
134
- build_library("rnllama_x86_64" "-march=x86-64" "-mtune=intel" "-msse4.2" "-mpopcnt")
152
+ build_library("rnllama_x86_64" "x86" "-march=x86-64" "-mtune=intel" "-msse4.2" "-mpopcnt")
153
+
135
154
  endif ()
@@ -69,7 +69,11 @@ public class LlamaContext {
69
69
  try {
70
70
  if (filepath.startsWith("content")) {
71
71
  Uri uri = Uri.parse(filepath);
72
- reactContext.getApplicationContext().getContentResolver().takePersistableUriPermission(uri, Intent.FLAG_GRANT_READ_URI_PERMISSION);
72
+ try {
73
+ reactContext.getApplicationContext().getContentResolver().takePersistableUriPermission(uri, Intent.FLAG_GRANT_READ_URI_PERMISSION);
74
+ } catch (SecurityException e) {
75
+ Log.w(NAME, "Persistable permission not granted for URI: " + uri);
76
+ }
73
77
  fis = reactContext.getApplicationContext().getContentResolver().openInputStream(uri);
74
78
  } else {
75
79
  fis = new FileInputStream(filepath);
@@ -107,7 +111,11 @@ public class LlamaContext {
107
111
  }
108
112
 
109
113
  String modelName = params.getString("model");
110
-
114
+
115
+ if(!isGGUF(modelName, reactContext)) {
116
+ throw new IllegalArgumentException("File is not in GGUF format");
117
+ }
118
+
111
119
  if (modelName.startsWith("content://")) {
112
120
  Uri uri = Uri.parse(modelName);
113
121
  try {
@@ -117,7 +125,6 @@ public class LlamaContext {
117
125
  Log.e(NAME, "Failed to convert to FD!");
118
126
  }
119
127
  }
120
-
121
128
 
122
129
  // Check if file has GGUF magic numbers
123
130
  this.id = id;
@@ -127,8 +134,6 @@ public class LlamaContext {
127
134
  modelName,
128
135
  // String chat_template,
129
136
  params.hasKey("chat_template") ? params.getString("chat_template") : "",
130
- // String reasoning_format,
131
- params.hasKey("reasoning_format") ? params.getString("reasoning_format") : "none",
132
137
  // boolean embedding,
133
138
  params.hasKey("embedding") ? params.getBoolean("embedding") : false,
134
139
  // int embd_normalize,
@@ -200,6 +205,7 @@ public class LlamaContext {
200
205
  String tools = params.hasKey("tools") ? params.getString("tools") : "";
201
206
  Boolean parallelToolCalls = params.hasKey("parallel_tool_calls") ? params.getBoolean("parallel_tool_calls") : false;
202
207
  String toolChoice = params.hasKey("tool_choice") ? params.getString("tool_choice") : "";
208
+ Boolean enableThinking = params.hasKey("enable_thinking") ? params.getBoolean("enable_thinking") : false;
203
209
  return getFormattedChatWithJinja(
204
210
  this.context,
205
211
  messages,
@@ -207,7 +213,8 @@ public class LlamaContext {
207
213
  jsonSchema,
208
214
  tools,
209
215
  parallelToolCalls,
210
- toolChoice
216
+ toolChoice,
217
+ enableThinking
211
218
  );
212
219
  }
213
220
 
@@ -296,12 +303,25 @@ public class LlamaContext {
296
303
  }
297
304
  }
298
305
 
306
+ int[] guide_tokens = null;
307
+ if (params.hasKey("guide_tokens")) {
308
+ ReadableArray guide_tokens_array = params.getArray("guide_tokens");
309
+ guide_tokens = new int[guide_tokens_array.size()];
310
+ for (int i = 0; i < guide_tokens_array.size(); i++) {
311
+ guide_tokens[i] = (int) guide_tokens_array.getDouble(i);
312
+ }
313
+ }
314
+
299
315
  WritableMap result = doCompletion(
300
316
  this.context,
301
317
  // String prompt,
302
318
  params.getString("prompt"),
319
+ // int[] guide_tokens,
320
+ guide_tokens,
303
321
  // int chat_format,
304
322
  params.hasKey("chat_format") ? params.getInt("chat_format") : 0,
323
+ // String reasoning_format,
324
+ params.hasKey("reasoning_format") ? params.getString("reasoning_format") : "none",
305
325
  // String grammar,
306
326
  params.hasKey("grammar") ? params.getString("grammar") : "",
307
327
  // String json_schema,
@@ -312,6 +332,8 @@ public class LlamaContext {
312
332
  params.hasKey("grammar_triggers") ? params.getArray("grammar_triggers") : null,
313
333
  // ReadableArray preserved_tokens,
314
334
  params.hasKey("preserved_tokens") ? params.getArray("preserved_tokens") : null,
335
+ // boolean thinking_forced_open,
336
+ params.hasKey("thinking_forced_open") ? params.getBoolean("thinking_forced_open") : false,
315
337
  // float temperature,
316
338
  params.hasKey("temperature") ? (float) params.getDouble("temperature") : 0.7f,
317
339
  // int n_threads,
@@ -416,6 +438,27 @@ public class LlamaContext {
416
438
  return result;
417
439
  }
418
440
 
441
+ public WritableArray getRerank(String query, ReadableArray documents, ReadableMap params) {
442
+ if (isEmbeddingEnabled(this.context) == false) {
443
+ throw new IllegalStateException("Embedding is not enabled but required for reranking");
444
+ }
445
+
446
+ // Convert ReadableArray to Java string array
447
+ String[] documentsArray = new String[documents.size()];
448
+ for (int i = 0; i < documents.size(); i++) {
449
+ documentsArray[i] = documents.getString(i);
450
+ }
451
+
452
+ WritableArray result = rerank(
453
+ this.context,
454
+ query,
455
+ documentsArray,
456
+ // int normalize,
457
+ params.hasKey("normalize") ? params.getInt("normalize") : -1
458
+ );
459
+ return result;
460
+ }
461
+
419
462
  public String bench(int pp, int tg, int pl, int nr) {
420
463
  return bench(this.context, pp, tg, pl, nr);
421
464
  }
@@ -442,6 +485,11 @@ public class LlamaContext {
442
485
  if (mmprojPath == null || mmprojPath.isEmpty()) {
443
486
  throw new IllegalArgumentException("mmproj_path is empty");
444
487
  }
488
+
489
+ if(!isGGUF(mmprojPath, this.reactContext)) {
490
+ throw new IllegalArgumentException("File is not in GGUF format");
491
+ }
492
+
445
493
  File file = new File(mmprojPath);
446
494
  if (!mmprojPath.startsWith("content") && !file.exists()) {
447
495
  throw new IllegalArgumentException("mmproj file does not exist: " + mmprojPath);
@@ -475,6 +523,34 @@ public class LlamaContext {
475
523
  releaseMultimodal(this.context);
476
524
  }
477
525
 
526
+ public boolean initVocoder(String vocoderModelPath) {
527
+ return initVocoder(this.context, vocoderModelPath);
528
+ }
529
+
530
+ public boolean isVocoderEnabled() {
531
+ return isVocoderEnabled(this.context);
532
+ }
533
+
534
+ public String getFormattedAudioCompletion(String speakerJsonStr, String textToSpeak) {
535
+ return getFormattedAudioCompletion(this.context, speakerJsonStr, textToSpeak);
536
+ }
537
+
538
+ public WritableArray getAudioCompletionGuideTokens(String textToSpeak) {
539
+ return getAudioCompletionGuideTokens(this.context, textToSpeak);
540
+ }
541
+
542
+ public WritableArray decodeAudioTokens(ReadableArray tokens) {
543
+ int[] toks = new int[tokens.size()];
544
+ for (int i = 0; i < tokens.size(); i++) {
545
+ toks[i] = (int) tokens.getDouble(i);
546
+ }
547
+ return decodeAudioTokens(this.context, toks);
548
+ }
549
+
550
+ public void releaseVocoder() {
551
+ releaseVocoder(this.context);
552
+ }
553
+
478
554
  public void release() {
479
555
  freeContext(context);
480
556
  }
@@ -576,7 +652,6 @@ public class LlamaContext {
576
652
  protected static native long initContext(
577
653
  String model_path,
578
654
  String chat_template,
579
- String reasoning_format,
580
655
  boolean embedding,
581
656
  int embd_normalize,
582
657
  int n_ctx,
@@ -613,7 +688,8 @@ public class LlamaContext {
613
688
  String jsonSchema,
614
689
  String tools,
615
690
  boolean parallelToolCalls,
616
- String toolChoice
691
+ String toolChoice,
692
+ boolean enableThinking
617
693
  );
618
694
  protected static native String getFormattedChat(
619
695
  long contextPtr,
@@ -632,12 +708,15 @@ public class LlamaContext {
632
708
  protected static native WritableMap doCompletion(
633
709
  long context_ptr,
634
710
  String prompt,
711
+ int[] guide_tokens,
635
712
  int chat_format,
713
+ String reasoning_format,
636
714
  String grammar,
637
715
  String json_schema,
638
716
  boolean grammar_lazy,
639
717
  ReadableArray grammar_triggers,
640
718
  ReadableArray preserved_tokens,
719
+ boolean thinking_forced_open,
641
720
  float temperature,
642
721
  int n_threads,
643
722
  int n_predict,
@@ -678,6 +757,7 @@ public class LlamaContext {
678
757
  String text,
679
758
  int embd_normalize
680
759
  );
760
+ protected static native WritableArray rerank(long contextPtr, String query, String[] documents, int normalize);
681
761
  protected static native String bench(long contextPtr, int pp, int tg, int pl, int nr);
682
762
  protected static native int applyLoraAdapters(long contextPtr, ReadableArray loraAdapters);
683
763
  protected static native void removeLoraAdapters(long contextPtr);
@@ -686,4 +766,10 @@ public class LlamaContext {
686
766
  protected static native void setupLog(NativeLogCallback logCallback);
687
767
  protected static native void unsetLog();
688
768
  protected static native void releaseMultimodal(long contextPtr);
769
+ protected static native boolean isVocoderEnabled(long contextPtr);
770
+ protected static native String getFormattedAudioCompletion(long contextPtr, String speakerJsonStr, String textToSpeak);
771
+ protected static native WritableArray getAudioCompletionGuideTokens(long contextPtr, String textToSpeak);
772
+ protected static native WritableArray decodeAudioTokens(long contextPtr, int[] tokens);
773
+ protected static native boolean initVocoder(long contextPtr, String vocoderModelPath);
774
+ protected static native void releaseVocoder(long contextPtr);
689
775
  }