cui-llama.rn 1.7.4 → 1.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. package/README.md +217 -17
  2. package/android/src/main/CMakeLists.txt +34 -15
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +79 -5
  4. package/android/src/main/java/com/rnllama/RNLlama.java +237 -0
  5. package/android/src/main/jni.cpp +213 -14
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  14. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
  15. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
  16. package/cpp/README.md +1 -1
  17. package/cpp/chat-parser.cpp +385 -0
  18. package/cpp/chat-parser.h +120 -0
  19. package/cpp/chat.cpp +726 -596
  20. package/cpp/chat.h +71 -6
  21. package/cpp/common.cpp +56 -38
  22. package/cpp/common.h +9 -3
  23. package/cpp/ggml-backend-reg.cpp +5 -0
  24. package/cpp/ggml-backend.cpp +10 -2
  25. package/cpp/ggml-common.h +4 -0
  26. package/cpp/ggml-cpu/amx/amx.cpp +1 -1
  27. package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
  28. package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  29. package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
  30. package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
  31. package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  32. package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
  33. package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
  34. package/cpp/ggml-cpu/arch-fallback.h +184 -0
  35. package/cpp/ggml-cpu/common.h +4 -3
  36. package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
  37. package/cpp/ggml-cpu/ggml-cpu.c +123 -104
  38. package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
  39. package/cpp/ggml-cpu/ops.cpp +330 -148
  40. package/cpp/ggml-cpu/ops.h +1 -0
  41. package/cpp/ggml-cpu/quants.c +1158 -0
  42. package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
  43. package/cpp/ggml-cpu/repack.cpp +1571 -0
  44. package/cpp/ggml-cpu/repack.h +98 -0
  45. package/cpp/ggml-cpu/simd-mappings.h +330 -38
  46. package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
  47. package/cpp/ggml-cpu/vec.cpp +87 -18
  48. package/cpp/ggml-cpu/vec.h +249 -94
  49. package/cpp/ggml-cpu.h +1 -0
  50. package/cpp/ggml-impl.h +63 -183
  51. package/cpp/ggml-llama-sim.metallib +0 -0
  52. package/cpp/ggml-llama.metallib +0 -0
  53. package/cpp/ggml-metal.m +152 -45
  54. package/cpp/ggml-quants.c +0 -2
  55. package/cpp/ggml.c +61 -21
  56. package/cpp/ggml.h +22 -3
  57. package/cpp/gguf.cpp +24 -3
  58. package/cpp/json-partial.cpp +256 -0
  59. package/cpp/json-partial.h +38 -0
  60. package/cpp/json-schema-to-grammar.cpp +5 -47
  61. package/cpp/json-schema-to-grammar.h +4 -4
  62. package/cpp/llama-arch.cpp +153 -3
  63. package/cpp/llama-arch.h +27 -1
  64. package/cpp/llama-batch.cpp +741 -272
  65. package/cpp/llama-batch.h +112 -54
  66. package/cpp/llama-chat.cpp +30 -8
  67. package/cpp/llama-chat.h +1 -0
  68. package/cpp/llama-context.cpp +524 -339
  69. package/cpp/llama-context.h +38 -17
  70. package/cpp/llama-cparams.cpp +4 -0
  71. package/cpp/llama-cparams.h +2 -0
  72. package/cpp/llama-grammar.cpp +12 -2
  73. package/cpp/llama-graph.cpp +431 -356
  74. package/cpp/llama-graph.h +126 -58
  75. package/cpp/llama-hparams.cpp +10 -2
  76. package/cpp/llama-hparams.h +19 -2
  77. package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
  78. package/cpp/llama-kv-cache-unified-iswa.h +128 -0
  79. package/cpp/llama-kv-cache-unified.cpp +1841 -0
  80. package/cpp/llama-kv-cache-unified.h +303 -0
  81. package/cpp/llama-kv-cells.h +439 -0
  82. package/cpp/llama-memory-hybrid.cpp +246 -0
  83. package/cpp/llama-memory-hybrid.h +138 -0
  84. package/cpp/llama-memory-recurrent.cpp +1112 -0
  85. package/cpp/llama-memory-recurrent.h +183 -0
  86. package/cpp/llama-memory.cpp +41 -0
  87. package/cpp/llama-memory.h +86 -5
  88. package/cpp/llama-mmap.cpp +1 -1
  89. package/cpp/llama-model-loader.cpp +42 -17
  90. package/cpp/llama-model-saver.cpp +1 -0
  91. package/cpp/llama-model.cpp +1639 -513
  92. package/cpp/llama-model.h +26 -0
  93. package/cpp/llama-sampling.cpp +2 -2
  94. package/cpp/llama-vocab.cpp +65 -28
  95. package/cpp/llama-vocab.h +1 -0
  96. package/cpp/llama.cpp +11 -7
  97. package/cpp/llama.h +150 -42
  98. package/cpp/minja/chat-template.hpp +1 -1
  99. package/cpp/minja/minja.hpp +1 -1
  100. package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
  101. package/cpp/nlohmann/json_fwd.hpp +187 -0
  102. package/cpp/regex-partial.cpp +204 -0
  103. package/cpp/regex-partial.h +56 -0
  104. package/cpp/rn-llama.cpp +646 -35
  105. package/cpp/rn-llama.h +32 -1
  106. package/cpp/rn-tts.h +39 -0
  107. package/cpp/sampling.cpp +7 -8
  108. package/cpp/tools/mtmd/clip-impl.h +5 -0
  109. package/cpp/tools/mtmd/clip.cpp +572 -436
  110. package/cpp/tools/mtmd/clip.h +14 -4
  111. package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
  112. package/cpp/tools/mtmd/mtmd-audio.h +2 -17
  113. package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
  114. package/cpp/tools/mtmd/mtmd-helper.h +91 -0
  115. package/cpp/tools/mtmd/mtmd.cpp +368 -248
  116. package/cpp/tools/mtmd/mtmd.h +6 -70
  117. package/cpp/unicode.cpp +5 -0
  118. package/ios/CMakeLists.txt +26 -6
  119. package/ios/RNLlama.h +1 -1
  120. package/ios/RNLlama.mm +153 -3
  121. package/ios/RNLlamaContext.h +9 -1
  122. package/ios/RNLlamaContext.mm +112 -9
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
  131. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  132. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
  133. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
  134. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  135. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
  136. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
  137. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
  138. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
  139. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  140. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  141. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  142. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  143. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  144. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
  145. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
  146. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
  147. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
  148. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  149. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
  150. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
  151. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  152. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
  153. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
  154. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
  155. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  156. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
  160. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
  161. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
  162. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
  163. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
  164. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
  165. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  166. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
  167. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
  168. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  169. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
  170. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
  171. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
  172. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
  173. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  174. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  175. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  176. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  177. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  178. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
  179. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
  180. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
  181. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
  182. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  183. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
  184. package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
  185. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  186. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
  187. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
  188. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
  189. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  190. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  191. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
  192. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
  193. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
  194. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
  195. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
  196. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
  197. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
  198. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
  199. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  200. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
  201. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
  202. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  203. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
  204. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
  205. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
  206. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
  207. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  208. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  209. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  210. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  211. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  212. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
  213. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
  214. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
  215. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
  216. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  217. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
  218. package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
  219. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  220. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
  221. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
  222. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
  223. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  224. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  225. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
  226. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
  227. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
  228. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
  229. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
  230. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
  231. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
  232. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
  233. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  234. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
  235. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
  236. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  237. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
  238. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
  239. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
  240. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
  241. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  242. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  243. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  244. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  245. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  246. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
  247. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
  248. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
  249. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
  250. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  251. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
  252. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
  253. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  254. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
  255. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
  256. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
  257. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  258. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  259. package/jest/mock.js +24 -0
  260. package/package.json +1 -1
  261. package/src/NativeRNLlama.ts +46 -2
  262. package/src/index.ts +105 -1
  263. package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  264. package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
  265. package/cpp/ggml-cpu/sgemm.cpp +0 -3544
  266. package/cpp/ggml-cpu/sgemm.h +0 -14
  267. package/cpp/llama-kv-cache.cpp +0 -2827
  268. package/cpp/llama-kv-cache.h +0 -515
  269. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  270. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  271. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  272. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
  273. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  274. /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
  275. /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
  276. /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
package/README.md CHANGED
@@ -55,6 +55,8 @@ For get a GGUF model or quantize manually, see [`Prepare and Quantize`](https://
55
55
 
56
56
  ## Usage
57
57
 
58
+ > **💡 New!** `llama.rn` now supports **multimodal models** with vision and audio capabilities! See the [Multimodal section](#multimodal-vision--audio) for details.
59
+
58
60
  Load model info only:
59
61
 
60
62
  ```js
@@ -123,49 +125,162 @@ console.log('Result:', textResult.text)
123
125
  console.log('Timings:', textResult.timings)
124
126
  ```
125
127
 
126
- The bindings deisgn inspired by [server.cpp](https://github.com/ggerganov/llama.cpp/tree/master/examples/server) example in llama.cpp:
128
+ The binding's deisgn inspired by [server.cpp](https://github.com/ggerganov/llama.cpp/tree/master/examples/server) example in llama.cpp:
127
129
 
128
130
  - `/completion` and `/chat/completions`: `context.completion(params, partialCompletionCallback)`
129
131
  - `/tokenize`: `context.tokenize(content)`
130
132
  - `/detokenize`: `context.detokenize(tokens)`
131
133
  - `/embedding`: `context.embedding(content)`
134
+ - `/rerank`: `context.rerank(query, documents, params)`
132
135
  - ... Other methods
133
136
 
134
137
  Please visit the [Documentation](docs/API) for more details.
135
138
 
136
139
  You can also visit the [example](example) to see how to use it.
137
140
 
138
- ## Session (State)
141
+ ## Multimodal (Vision & Audio)
139
142
 
140
- The session file is a binary file that contains the state of the context, it can saves time of prompt processing.
143
+ `llama.rn` supports multimodal capabilities including vision (images) and audio processing. This allows you to interact with models that can understand both text and media content.
144
+
145
+ ### Supported Media Formats
146
+
147
+ **Images (Vision):**
148
+ - JPEG, PNG, BMP, GIF, TGA, HDR, PIC, PNM
149
+ - Base64 encoded images (data URLs)
150
+ - Local file paths
151
+ - \* Not supported HTTP URLs yet
152
+
153
+ **Audio:**
154
+ - WAV, MP3 formats
155
+ - Base64 encoded audio (data URLs)
156
+ - Local file paths
157
+ - \* Not supported HTTP URLs yet
158
+
159
+ ### Setup
160
+
161
+ First, you need a multimodal model and its corresponding multimodal projector (mmproj) file, see [how to obtain mmproj](https://github.com/ggml-org/llama.cpp/tree/master/tools/mtmd#how-to-obtain-mmproj) for more details.
162
+
163
+ ### Initialize Multimodal Support
141
164
 
142
165
  ```js
143
- const context = await initLlama({ ...params })
166
+ import { initLlama } from 'llama.rn'
144
167
 
145
- // After prompt processing or completion ...
168
+ // First initialize the model context
169
+ const context = await initLlama({
170
+ model: 'path/to/your/multimodal-model.gguf',
171
+ n_ctx: 4096,
172
+ n_gpu_layers: 99, // Recommended for multimodal models
173
+ // Important: Disable context shifting for multimodal
174
+ ctx_shift: false,
175
+ })
146
176
 
147
- // Save the session
148
- await context.saveSession('<path to save session>')
177
+ // Initialize multimodal support with mmproj file
178
+ const success = await context.initMultimodal({
179
+ path: 'path/to/your/mmproj-model.gguf',
180
+ use_gpu: true, // Recommended for better performance
181
+ })
149
182
 
150
- // Load the session
151
- await context.loadSession('<path to load session>')
183
+ // Check if multimodal is enabled
184
+ console.log('Multimodal enabled:', await context.isMultimodalEnabled())
185
+
186
+ if (success) {
187
+ console.log('Multimodal support initialized!')
188
+
189
+ // Check what modalities are supported
190
+ const support = await context.getMultimodalSupport()
191
+ console.log('Vision support:', support.vision)
192
+ console.log('Audio support:', support.audio)
193
+ } else {
194
+ console.log('Failed to initialize multimodal support')
195
+ }
196
+
197
+ // Release multimodal context
198
+ await context.releaseMultimodal()
152
199
  ```
153
200
 
154
- ## Embedding
201
+ ### Usage Examples
155
202
 
156
- The embedding API is used to get the embedding of a text.
203
+ #### Vision (Image Processing)
157
204
 
158
205
  ```js
159
- const context = await initLlama({
160
- ...params,
161
- embedding: true,
206
+ const result = await context.completion({
207
+ messages: [
208
+ {
209
+ role: 'user',
210
+ content: [
211
+ {
212
+ type: 'text',
213
+ text: 'What do you see in this image?',
214
+ },
215
+ {
216
+ type: 'image_url',
217
+ image_url: {
218
+ url: 'file:///path/to/image.jpg',
219
+ // or base64: '...'
220
+ },
221
+ },
222
+ ],
223
+ },
224
+ ],
225
+ n_predict: 100,
226
+ temperature: 0.1,
162
227
  })
163
228
 
164
- const { embedding } = await context.embedding('Hello, world!')
229
+ console.log('AI Response:', result.text)
165
230
  ```
166
231
 
167
- - You can use model like [nomic-ai/nomic-embed-text-v1.5-GGUF](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF) for better embedding quality.
168
- - You can use DB like [op-sqlite](https://github.com/OP-Engineering/op-sqlite) with sqlite-vec support to store and search embeddings.
232
+ #### Audio Processing
233
+
234
+ ```js
235
+ // Method 1: Using structured message content (Recommended)
236
+ const result = await context.completion({
237
+ messages: [
238
+ {
239
+ role: 'user',
240
+ content: [
241
+ {
242
+ type: 'text',
243
+ text: 'Transcribe or describe this audio:',
244
+ },
245
+ {
246
+ type: 'input_audio',
247
+ input_audio: {
248
+ data: 'data:audio/wav;base64,UklGRiQAAABXQVZFZm10...',
249
+ // or url: 'file:///path/to/audio.wav',
250
+ format: 'wav', // or 'mp3'
251
+ },
252
+ },
253
+ ],
254
+ },
255
+ ],
256
+ n_predict: 200,
257
+ })
258
+
259
+ console.log('Transcription:', result.text)
260
+ ```
261
+
262
+ ### Tokenization with Media
263
+
264
+ ```js
265
+ // Tokenize text with media
266
+ const tokenizeResult = await context.tokenize(
267
+ 'Describe this image: <__media__>',
268
+ {
269
+ media_paths: ['file:///path/to/image.jpg']
270
+ }
271
+ )
272
+
273
+ console.log('Tokens:', tokenizeResult.tokens)
274
+ console.log('Has media:', tokenizeResult.has_media)
275
+ console.log('Media positions:', tokenizeResult.chunk_pos_media)
276
+ ```
277
+
278
+ ### Notes
279
+
280
+ - **Context Shifting**: Multimodal models require `ctx_shift: false` to maintain media token positioning
281
+ - **Memory**: Multimodal models require more memory; use adequate `n_ctx` and consider GPU offloading
282
+ - **Media Markers**: The system automatically handles `<__media__>` markers in prompts. When using structured message content, media items are automatically replaced with this marker
283
+ - **Model Compatibility**: Ensure your model supports the media type you're trying to process
169
284
 
170
285
  ## Tool Calling
171
286
 
@@ -289,6 +404,91 @@ console.log('Result:', text)
289
404
 
290
405
  Also, this is how `json_schema` works in `response_format` during completion, it converts the json_schema to gbnf grammar.
291
406
 
407
+ ## Session (State)
408
+
409
+ The session file is a binary file that contains the state of the context, it can saves time of prompt processing.
410
+
411
+ ```js
412
+ const context = await initLlama({ ...params })
413
+
414
+ // After prompt processing or completion ...
415
+
416
+ // Save the session
417
+ await context.saveSession('<path to save session>')
418
+
419
+ // Load the session
420
+ await context.loadSession('<path to load session>')
421
+ ```
422
+
423
+ ### Notes
424
+
425
+ - \* Session is currently not supported save state from multimodal context, so it only stores the text chunk before the first media chunk.
426
+
427
+ ## Embedding
428
+
429
+ The embedding API is used to get the embedding of a text.
430
+
431
+ ```js
432
+ const context = await initLlama({
433
+ ...params,
434
+ embedding: true,
435
+ })
436
+
437
+ const { embedding } = await context.embedding('Hello, world!')
438
+ ```
439
+
440
+ - You can use model like [nomic-ai/nomic-embed-text-v1.5-GGUF](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF) for better embedding quality.
441
+ - You can use DB like [op-sqlite](https://github.com/OP-Engineering/op-sqlite) with sqlite-vec support to store and search embeddings.
442
+
443
+ ## Rerank
444
+
445
+ The rerank API is used to rank documents based on their relevance to a query. This is particularly useful for improving search results and implementing retrieval-augmented generation (RAG) systems.
446
+
447
+ ```js
448
+ const context = await initLlama({
449
+ ...params,
450
+ embedding: true, // Required for reranking
451
+ pooling_type: 'rank', // Use rank pooling for rerank models
452
+ })
453
+
454
+ // Rerank documents based on relevance to query
455
+ const results = await context.rerank(
456
+ 'What is artificial intelligence?', // query
457
+ [
458
+ 'AI is a branch of computer science.',
459
+ 'The weather is nice today.',
460
+ 'Machine learning is a subset of AI.',
461
+ 'I like pizza.',
462
+ ], // documents to rank
463
+ {
464
+ normalize: 1, // Optional: normalize scores (default: from model config)
465
+ }
466
+ )
467
+
468
+ // Results are automatically sorted by score (highest first)
469
+ results.forEach((result, index) => {
470
+ console.log(`Rank ${index + 1}:`, {
471
+ score: result.score,
472
+ document: result.document,
473
+ originalIndex: result.index,
474
+ })
475
+ })
476
+ ```
477
+
478
+ ### Notes
479
+
480
+ - **Model Requirements**: Reranking requires models with `RANK` pooling type (e.g., reranker models)
481
+ - **Embedding Enabled**: The context must have `embedding: true` to use rerank functionality
482
+ - **Automatic Sorting**: Results are returned sorted by relevance score in descending order
483
+ - **Document Access**: Each result includes the original document text and its index in the input array
484
+ - **Score Interpretation**: Higher scores indicate higher relevance to the query
485
+
486
+ ### Recommended Models
487
+
488
+ - [jinaai - jina-reranker-v2-base-multilingual-GGUF](https://huggingface.co/gpustack/jina-reranker-v2-base-multilingual-GGUF)
489
+ - [BAAI - bge-reranker-v2-m3-GGUF](https://huggingface.co/gpustack/bge-reranker-v2-m3-GGUF)
490
+ - Other models with "rerank" or "reranker" in their name and GGUF format
491
+
292
492
  ## Mock `llama.rn`
293
493
 
294
494
  We have provided a mock version of `llama.rn` for testing purpose you can use on Jest:
@@ -27,12 +27,11 @@ set(
27
27
  ${RNLLAMA_LIB_DIR}/ggml-cpu/amx/mmq.cpp
28
28
  ${RNLLAMA_LIB_DIR}/ggml-cpu/ggml-cpu.c
29
29
  ${RNLLAMA_LIB_DIR}/ggml-cpu/ggml-cpu.cpp
30
- ${RNLLAMA_LIB_DIR}/ggml-cpu/ggml-cpu-aarch64.cpp
31
- ${RNLLAMA_LIB_DIR}/ggml-cpu/ggml-cpu-quants.c
32
- ${RNLLAMA_LIB_DIR}/ggml-cpu/ggml-cpu-traits.cpp
30
+ ${RNLLAMA_LIB_DIR}/ggml-cpu/quants.c
31
+ ${RNLLAMA_LIB_DIR}/ggml-cpu/traits.cpp
32
+ ${RNLLAMA_LIB_DIR}/ggml-cpu/repack.cpp
33
33
  ${RNLLAMA_LIB_DIR}/ggml-cpu/unary-ops.cpp
34
34
  ${RNLLAMA_LIB_DIR}/ggml-cpu/binary-ops.cpp
35
- ${RNLLAMA_LIB_DIR}/ggml-cpu/sgemm.cpp
36
35
  ${RNLLAMA_LIB_DIR}/ggml-cpu/vec.cpp
37
36
  ${RNLLAMA_LIB_DIR}/ggml-cpu/ops.cpp
38
37
  ${RNLLAMA_LIB_DIR}/ggml-opt.cpp
@@ -41,6 +40,9 @@ set(
41
40
  ${RNLLAMA_LIB_DIR}/gguf.cpp
42
41
  ${RNLLAMA_LIB_DIR}/log.cpp
43
42
  ${RNLLAMA_LIB_DIR}/llama-impl.cpp
43
+ ${RNLLAMA_LIB_DIR}/chat-parser.cpp
44
+ ${RNLLAMA_LIB_DIR}/json-partial.cpp
45
+ ${RNLLAMA_LIB_DIR}/regex-partial.cpp
44
46
  # Multimodal support
45
47
  ${RNLLAMA_LIB_DIR}/tools/mtmd/mtmd.cpp
46
48
  ${RNLLAMA_LIB_DIR}/tools/mtmd/mtmd-audio.cpp
@@ -52,7 +54,6 @@ set(
52
54
  ${RNLLAMA_LIB_DIR}/llama-adapter.cpp
53
55
  ${RNLLAMA_LIB_DIR}/llama-chat.cpp
54
56
  ${RNLLAMA_LIB_DIR}/llama-context.cpp
55
- ${RNLLAMA_LIB_DIR}/llama-kv-cache.cpp
56
57
  ${RNLLAMA_LIB_DIR}/llama-arch.cpp
57
58
  ${RNLLAMA_LIB_DIR}/llama-batch.cpp
58
59
  ${RNLLAMA_LIB_DIR}/llama-cparams.cpp
@@ -60,6 +61,10 @@ set(
60
61
  ${RNLLAMA_LIB_DIR}/llama.cpp
61
62
  ${RNLLAMA_LIB_DIR}/llama-model.cpp
62
63
  ${RNLLAMA_LIB_DIR}/llama-model-loader.cpp
64
+ ${RNLLAMA_LIB_DIR}/llama-kv-cache-unified.cpp
65
+ ${RNLLAMA_LIB_DIR}/llama-kv-cache-unified-iswa.cpp
66
+ ${RNLLAMA_LIB_DIR}/llama-memory-hybrid.cpp
67
+ ${RNLLAMA_LIB_DIR}/llama-memory-recurrent.cpp
63
68
  ${RNLLAMA_LIB_DIR}/llama-mmap.cpp
64
69
  ${RNLLAMA_LIB_DIR}/llama-vocab.cpp
65
70
  ${RNLLAMA_LIB_DIR}/llama-memory.cpp
@@ -71,7 +76,8 @@ set(
71
76
  ${RNLLAMA_LIB_DIR}/common.cpp
72
77
  ${RNLLAMA_LIB_DIR}/chat.cpp
73
78
  ${RNLLAMA_LIB_DIR}/json-schema-to-grammar.cpp
74
- ${RNLLAMA_LIB_DIR}/json.hpp
79
+ ${RNLLAMA_LIB_DIR}/nlohmann/json.hpp
80
+ ${RNLLAMA_LIB_DIR}/nlohmann/json_fwd.hpp
75
81
  ${RNLLAMA_LIB_DIR}/minja/minja.hpp
76
82
  ${RNLLAMA_LIB_DIR}/minja/chat-template.hpp
77
83
  ${RNLLAMA_LIB_DIR}/rn-llama.cpp
@@ -81,16 +87,28 @@ set(
81
87
 
82
88
  find_library(LOG_LIB log)
83
89
 
84
- function(build_library target_name cpu_flags)
90
+ function(build_library target_name arch cpu_flags)
91
+ if (NOT ${arch} STREQUAL "generic")
92
+ set(SOURCE_FILES_ARCH
93
+ ${RNLLAMA_LIB_DIR}/ggml-cpu/arch/${arch}/quants.c
94
+ ${RNLLAMA_LIB_DIR}/ggml-cpu/arch/${arch}/repack.cpp
95
+ )
96
+ endif ()
97
+
85
98
  add_library(
86
99
  ${target_name}
87
100
  SHARED
88
101
  ${SOURCE_FILES}
102
+ ${SOURCE_FILES_ARCH}
89
103
  )
90
104
 
91
105
  target_link_libraries(${target_name} ${LOG_LIB} android)
92
106
 
93
- target_compile_options(${target_name} PRIVATE -DLM_GGML_USE_CPU -DLM_GGML_USE_CPU_AARCH64 -DRNLLAMA_USE_FD_FILE -pthread ${cpu_flags})
107
+ if (${arch} STREQUAL "generic")
108
+ target_compile_options(${target_name} PRIVATE -DLM_GGML_CPU_GENERIC)
109
+ endif ()
110
+
111
+ target_compile_options(${target_name} PRIVATE -DLM_GGML_USE_CPU -DLM_GGML_USE_CPU_REPACK -DRNLLAMA_USE_FD_FILE -pthread ${cpu_flags})
94
112
 
95
113
  if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
96
114
  target_compile_options(${target_name} PRIVATE -DRNLLAMA_ANDROID_ENABLE_LOGGING)
@@ -111,17 +129,17 @@ endfunction()
111
129
 
112
130
 
113
131
  # Default target (no specific CPU features)
114
- build_library("rnllama" "")
132
+ build_library("rnllama" "generic" "")
115
133
 
116
134
  if (${ANDROID_ABI} STREQUAL "arm64-v8a")
117
135
  # ARM64 targets
118
136
  # Removing fp16 for now as it leads to issues with some models like deepseek r1 distills
119
137
  # https://github.com/mybigday/llama.rn/pull/110#issuecomment-2609918310
120
- build_library("rnllama_v8" "-march=armv8-a")
121
- build_library("rnllama_v8_2" "-march=armv8.2-a")
122
- build_library("rnllama_v8_2_dotprod" "-march=armv8.2-a+dotprod")
123
- build_library("rnllama_v8_2_i8mm" "-march=armv8.2-a+i8mm")
124
- build_library("rnllama_v8_2_dotprod_i8mm" "-march=armv8.2-a+dotprod+i8mm")
138
+ build_library("rnllama_v8" "arm" "-march=armv8-a")
139
+ build_library("rnllama_v8_2" "arm" "-march=armv8.2-a")
140
+ build_library("rnllama_v8_2_dotprod" "arm" "-march=armv8.2-a+dotprod")
141
+ build_library("rnllama_v8_2_i8mm" "arm" "-march=armv8.2-a+i8mm")
142
+ build_library("rnllama_v8_2_dotprod_i8mm" "arm" "-march=armv8.2-a+dotprod+i8mm")
125
143
 
126
144
  # https://github.com/ggerganov/llama.cpp/blob/master/docs/android.md#cross-compile-using-android-ndk
127
145
  # llama.cpp will deal with the cpu features
@@ -131,5 +149,6 @@ if (${ANDROID_ABI} STREQUAL "arm64-v8a")
131
149
 
132
150
  elseif (${ANDROID_ABI} STREQUAL "x86_64")
133
151
  # x86_64 target
134
- build_library("rnllama_x86_64" "-march=x86-64" "-mtune=intel" "-msse4.2" "-mpopcnt")
152
+ build_library("rnllama_x86_64" "x86" "-march=x86-64" "-mtune=intel" "-msse4.2" "-mpopcnt")
153
+
135
154
  endif ()
@@ -134,8 +134,6 @@ public class LlamaContext {
134
134
  modelName,
135
135
  // String chat_template,
136
136
  params.hasKey("chat_template") ? params.getString("chat_template") : "",
137
- // String reasoning_format,
138
- params.hasKey("reasoning_format") ? params.getString("reasoning_format") : "none",
139
137
  // boolean embedding,
140
138
  params.hasKey("embedding") ? params.getBoolean("embedding") : false,
141
139
  // int embd_normalize,
@@ -207,6 +205,7 @@ public class LlamaContext {
207
205
  String tools = params.hasKey("tools") ? params.getString("tools") : "";
208
206
  Boolean parallelToolCalls = params.hasKey("parallel_tool_calls") ? params.getBoolean("parallel_tool_calls") : false;
209
207
  String toolChoice = params.hasKey("tool_choice") ? params.getString("tool_choice") : "";
208
+ Boolean enableThinking = params.hasKey("enable_thinking") ? params.getBoolean("enable_thinking") : false;
210
209
  return getFormattedChatWithJinja(
211
210
  this.context,
212
211
  messages,
@@ -214,7 +213,8 @@ public class LlamaContext {
214
213
  jsonSchema,
215
214
  tools,
216
215
  parallelToolCalls,
217
- toolChoice
216
+ toolChoice,
217
+ enableThinking
218
218
  );
219
219
  }
220
220
 
@@ -303,12 +303,25 @@ public class LlamaContext {
303
303
  }
304
304
  }
305
305
 
306
+ int[] guide_tokens = null;
307
+ if (params.hasKey("guide_tokens")) {
308
+ ReadableArray guide_tokens_array = params.getArray("guide_tokens");
309
+ guide_tokens = new int[guide_tokens_array.size()];
310
+ for (int i = 0; i < guide_tokens_array.size(); i++) {
311
+ guide_tokens[i] = (int) guide_tokens_array.getDouble(i);
312
+ }
313
+ }
314
+
306
315
  WritableMap result = doCompletion(
307
316
  this.context,
308
317
  // String prompt,
309
318
  params.getString("prompt"),
319
+ // int[] guide_tokens,
320
+ guide_tokens,
310
321
  // int chat_format,
311
322
  params.hasKey("chat_format") ? params.getInt("chat_format") : 0,
323
+ // String reasoning_format,
324
+ params.hasKey("reasoning_format") ? params.getString("reasoning_format") : "none",
312
325
  // String grammar,
313
326
  params.hasKey("grammar") ? params.getString("grammar") : "",
314
327
  // String json_schema,
@@ -319,6 +332,8 @@ public class LlamaContext {
319
332
  params.hasKey("grammar_triggers") ? params.getArray("grammar_triggers") : null,
320
333
  // ReadableArray preserved_tokens,
321
334
  params.hasKey("preserved_tokens") ? params.getArray("preserved_tokens") : null,
335
+ // boolean thinking_forced_open,
336
+ params.hasKey("thinking_forced_open") ? params.getBoolean("thinking_forced_open") : false,
322
337
  // float temperature,
323
338
  params.hasKey("temperature") ? (float) params.getDouble("temperature") : 0.7f,
324
339
  // int n_threads,
@@ -423,6 +438,27 @@ public class LlamaContext {
423
438
  return result;
424
439
  }
425
440
 
441
+ public WritableArray getRerank(String query, ReadableArray documents, ReadableMap params) {
442
+ if (isEmbeddingEnabled(this.context) == false) {
443
+ throw new IllegalStateException("Embedding is not enabled but required for reranking");
444
+ }
445
+
446
+ // Convert ReadableArray to Java string array
447
+ String[] documentsArray = new String[documents.size()];
448
+ for (int i = 0; i < documents.size(); i++) {
449
+ documentsArray[i] = documents.getString(i);
450
+ }
451
+
452
+ WritableArray result = rerank(
453
+ this.context,
454
+ query,
455
+ documentsArray,
456
+ // int normalize,
457
+ params.hasKey("normalize") ? params.getInt("normalize") : -1
458
+ );
459
+ return result;
460
+ }
461
+
426
462
  public String bench(int pp, int tg, int pl, int nr) {
427
463
  return bench(this.context, pp, tg, pl, nr);
428
464
  }
@@ -487,6 +523,34 @@ public class LlamaContext {
487
523
  releaseMultimodal(this.context);
488
524
  }
489
525
 
526
+ public boolean initVocoder(String vocoderModelPath) {
527
+ return initVocoder(this.context, vocoderModelPath);
528
+ }
529
+
530
+ public boolean isVocoderEnabled() {
531
+ return isVocoderEnabled(this.context);
532
+ }
533
+
534
+ public String getFormattedAudioCompletion(String speakerJsonStr, String textToSpeak) {
535
+ return getFormattedAudioCompletion(this.context, speakerJsonStr, textToSpeak);
536
+ }
537
+
538
+ public WritableArray getAudioCompletionGuideTokens(String textToSpeak) {
539
+ return getAudioCompletionGuideTokens(this.context, textToSpeak);
540
+ }
541
+
542
+ public WritableArray decodeAudioTokens(ReadableArray tokens) {
543
+ int[] toks = new int[tokens.size()];
544
+ for (int i = 0; i < tokens.size(); i++) {
545
+ toks[i] = (int) tokens.getDouble(i);
546
+ }
547
+ return decodeAudioTokens(this.context, toks);
548
+ }
549
+
550
+ public void releaseVocoder() {
551
+ releaseVocoder(this.context);
552
+ }
553
+
490
554
  public void release() {
491
555
  freeContext(context);
492
556
  }
@@ -588,7 +652,6 @@ public class LlamaContext {
588
652
  protected static native long initContext(
589
653
  String model_path,
590
654
  String chat_template,
591
- String reasoning_format,
592
655
  boolean embedding,
593
656
  int embd_normalize,
594
657
  int n_ctx,
@@ -625,7 +688,8 @@ public class LlamaContext {
625
688
  String jsonSchema,
626
689
  String tools,
627
690
  boolean parallelToolCalls,
628
- String toolChoice
691
+ String toolChoice,
692
+ boolean enableThinking
629
693
  );
630
694
  protected static native String getFormattedChat(
631
695
  long contextPtr,
@@ -644,12 +708,15 @@ public class LlamaContext {
644
708
  protected static native WritableMap doCompletion(
645
709
  long context_ptr,
646
710
  String prompt,
711
+ int[] guide_tokens,
647
712
  int chat_format,
713
+ String reasoning_format,
648
714
  String grammar,
649
715
  String json_schema,
650
716
  boolean grammar_lazy,
651
717
  ReadableArray grammar_triggers,
652
718
  ReadableArray preserved_tokens,
719
+ boolean thinking_forced_open,
653
720
  float temperature,
654
721
  int n_threads,
655
722
  int n_predict,
@@ -690,6 +757,7 @@ public class LlamaContext {
690
757
  String text,
691
758
  int embd_normalize
692
759
  );
760
+ protected static native WritableArray rerank(long contextPtr, String query, String[] documents, int normalize);
693
761
  protected static native String bench(long contextPtr, int pp, int tg, int pl, int nr);
694
762
  protected static native int applyLoraAdapters(long contextPtr, ReadableArray loraAdapters);
695
763
  protected static native void removeLoraAdapters(long contextPtr);
@@ -698,4 +766,10 @@ public class LlamaContext {
698
766
  protected static native void setupLog(NativeLogCallback logCallback);
699
767
  protected static native void unsetLog();
700
768
  protected static native void releaseMultimodal(long contextPtr);
769
+ protected static native boolean isVocoderEnabled(long contextPtr);
770
+ protected static native String getFormattedAudioCompletion(long contextPtr, String speakerJsonStr, String textToSpeak);
771
+ protected static native WritableArray getAudioCompletionGuideTokens(long contextPtr, String textToSpeak);
772
+ protected static native WritableArray decodeAudioTokens(long contextPtr, int[] tokens);
773
+ protected static native boolean initVocoder(long contextPtr, String vocoderModelPath);
774
+ protected static native void releaseVocoder(long contextPtr);
701
775
  }