cui-llama.rn 1.7.4 → 1.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. package/README.md +217 -17
  2. package/android/src/main/CMakeLists.txt +34 -15
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +79 -5
  4. package/android/src/main/java/com/rnllama/RNLlama.java +237 -0
  5. package/android/src/main/jni.cpp +213 -14
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  14. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
  15. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
  16. package/cpp/README.md +1 -1
  17. package/cpp/chat-parser.cpp +385 -0
  18. package/cpp/chat-parser.h +120 -0
  19. package/cpp/chat.cpp +726 -596
  20. package/cpp/chat.h +71 -6
  21. package/cpp/common.cpp +56 -38
  22. package/cpp/common.h +9 -3
  23. package/cpp/ggml-backend-reg.cpp +5 -0
  24. package/cpp/ggml-backend.cpp +10 -2
  25. package/cpp/ggml-common.h +4 -0
  26. package/cpp/ggml-cpu/amx/amx.cpp +1 -1
  27. package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
  28. package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  29. package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
  30. package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
  31. package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  32. package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
  33. package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
  34. package/cpp/ggml-cpu/arch-fallback.h +184 -0
  35. package/cpp/ggml-cpu/common.h +4 -3
  36. package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
  37. package/cpp/ggml-cpu/ggml-cpu.c +123 -104
  38. package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
  39. package/cpp/ggml-cpu/ops.cpp +330 -148
  40. package/cpp/ggml-cpu/ops.h +1 -0
  41. package/cpp/ggml-cpu/quants.c +1158 -0
  42. package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
  43. package/cpp/ggml-cpu/repack.cpp +1571 -0
  44. package/cpp/ggml-cpu/repack.h +98 -0
  45. package/cpp/ggml-cpu/simd-mappings.h +330 -38
  46. package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
  47. package/cpp/ggml-cpu/vec.cpp +87 -18
  48. package/cpp/ggml-cpu/vec.h +249 -94
  49. package/cpp/ggml-cpu.h +1 -0
  50. package/cpp/ggml-impl.h +63 -183
  51. package/cpp/ggml-llama-sim.metallib +0 -0
  52. package/cpp/ggml-llama.metallib +0 -0
  53. package/cpp/ggml-metal.m +152 -45
  54. package/cpp/ggml-quants.c +0 -2
  55. package/cpp/ggml.c +61 -21
  56. package/cpp/ggml.h +22 -3
  57. package/cpp/gguf.cpp +24 -3
  58. package/cpp/json-partial.cpp +256 -0
  59. package/cpp/json-partial.h +38 -0
  60. package/cpp/json-schema-to-grammar.cpp +5 -47
  61. package/cpp/json-schema-to-grammar.h +4 -4
  62. package/cpp/llama-arch.cpp +153 -3
  63. package/cpp/llama-arch.h +27 -1
  64. package/cpp/llama-batch.cpp +741 -272
  65. package/cpp/llama-batch.h +112 -54
  66. package/cpp/llama-chat.cpp +30 -8
  67. package/cpp/llama-chat.h +1 -0
  68. package/cpp/llama-context.cpp +524 -339
  69. package/cpp/llama-context.h +38 -17
  70. package/cpp/llama-cparams.cpp +4 -0
  71. package/cpp/llama-cparams.h +2 -0
  72. package/cpp/llama-grammar.cpp +12 -2
  73. package/cpp/llama-graph.cpp +431 -356
  74. package/cpp/llama-graph.h +126 -58
  75. package/cpp/llama-hparams.cpp +10 -2
  76. package/cpp/llama-hparams.h +19 -2
  77. package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
  78. package/cpp/llama-kv-cache-unified-iswa.h +128 -0
  79. package/cpp/llama-kv-cache-unified.cpp +1841 -0
  80. package/cpp/llama-kv-cache-unified.h +303 -0
  81. package/cpp/llama-kv-cells.h +439 -0
  82. package/cpp/llama-memory-hybrid.cpp +246 -0
  83. package/cpp/llama-memory-hybrid.h +138 -0
  84. package/cpp/llama-memory-recurrent.cpp +1112 -0
  85. package/cpp/llama-memory-recurrent.h +183 -0
  86. package/cpp/llama-memory.cpp +41 -0
  87. package/cpp/llama-memory.h +86 -5
  88. package/cpp/llama-mmap.cpp +1 -1
  89. package/cpp/llama-model-loader.cpp +42 -17
  90. package/cpp/llama-model-saver.cpp +1 -0
  91. package/cpp/llama-model.cpp +1639 -513
  92. package/cpp/llama-model.h +26 -0
  93. package/cpp/llama-sampling.cpp +2 -2
  94. package/cpp/llama-vocab.cpp +65 -28
  95. package/cpp/llama-vocab.h +1 -0
  96. package/cpp/llama.cpp +11 -7
  97. package/cpp/llama.h +150 -42
  98. package/cpp/minja/chat-template.hpp +1 -1
  99. package/cpp/minja/minja.hpp +1 -1
  100. package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
  101. package/cpp/nlohmann/json_fwd.hpp +187 -0
  102. package/cpp/regex-partial.cpp +204 -0
  103. package/cpp/regex-partial.h +56 -0
  104. package/cpp/rn-llama.cpp +646 -35
  105. package/cpp/rn-llama.h +32 -1
  106. package/cpp/rn-tts.h +39 -0
  107. package/cpp/sampling.cpp +7 -8
  108. package/cpp/tools/mtmd/clip-impl.h +5 -0
  109. package/cpp/tools/mtmd/clip.cpp +572 -436
  110. package/cpp/tools/mtmd/clip.h +14 -4
  111. package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
  112. package/cpp/tools/mtmd/mtmd-audio.h +2 -17
  113. package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
  114. package/cpp/tools/mtmd/mtmd-helper.h +91 -0
  115. package/cpp/tools/mtmd/mtmd.cpp +368 -248
  116. package/cpp/tools/mtmd/mtmd.h +6 -70
  117. package/cpp/unicode.cpp +5 -0
  118. package/ios/CMakeLists.txt +26 -6
  119. package/ios/RNLlama.h +1 -1
  120. package/ios/RNLlama.mm +153 -3
  121. package/ios/RNLlamaContext.h +9 -1
  122. package/ios/RNLlamaContext.mm +112 -9
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
  131. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  132. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
  133. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
  134. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  135. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
  136. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
  137. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
  138. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
  139. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  140. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  141. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  142. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  143. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  144. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
  145. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
  146. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
  147. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
  148. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  149. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
  150. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
  151. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  152. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
  153. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
  154. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
  155. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  156. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
  160. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
  161. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
  162. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
  163. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
  164. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
  165. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  166. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
  167. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
  168. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  169. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
  170. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
  171. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
  172. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
  173. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  174. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  175. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  176. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  177. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  178. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
  179. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
  180. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
  181. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
  182. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  183. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
  184. package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
  185. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  186. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
  187. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
  188. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
  189. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  190. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  191. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
  192. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
  193. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
  194. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
  195. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
  196. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
  197. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
  198. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
  199. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  200. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
  201. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
  202. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  203. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
  204. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
  205. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
  206. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
  207. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  208. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  209. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  210. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  211. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  212. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
  213. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
  214. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
  215. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
  216. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  217. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
  218. package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
  219. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  220. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
  221. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
  222. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
  223. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  224. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  225. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
  226. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
  227. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
  228. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
  229. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
  230. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
  231. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
  232. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
  233. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  234. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
  235. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
  236. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  237. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
  238. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
  239. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
  240. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
  241. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  242. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  243. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  244. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  245. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  246. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
  247. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
  248. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
  249. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
  250. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  251. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
  252. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
  253. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  254. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
  255. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
  256. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
  257. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  258. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  259. package/jest/mock.js +24 -0
  260. package/package.json +1 -1
  261. package/src/NativeRNLlama.ts +46 -2
  262. package/src/index.ts +105 -1
  263. package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  264. package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
  265. package/cpp/ggml-cpu/sgemm.cpp +0 -3544
  266. package/cpp/ggml-cpu/sgemm.h +0 -14
  267. package/cpp/llama-kv-cache.cpp +0 -2827
  268. package/cpp/llama-kv-cache.h +0 -515
  269. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  270. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  271. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  272. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
  273. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  274. /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
  275. /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
  276. /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
@@ -16,6 +16,7 @@ import com.facebook.react.bridge.LifecycleEventListener;
16
16
  import com.facebook.react.bridge.ReadableMap;
17
17
  import com.facebook.react.bridge.ReadableArray;
18
18
  import com.facebook.react.bridge.WritableMap;
19
+ import com.facebook.react.bridge.WritableArray;
19
20
  import com.facebook.react.bridge.Arguments;
20
21
 
21
22
 
@@ -494,6 +495,38 @@ public class RNLlama implements LifecycleEventListener {
494
495
  tasks.put(task, "embedding-" + contextId);
495
496
  }
496
497
 
498
+ public void rerank(double id, final String query, final ReadableArray documents, final ReadableMap params, final Promise promise) {
499
+ final int contextId = (int) id;
500
+ AsyncTask task = new AsyncTask<Void, Void, WritableArray>() {
501
+ private Exception exception;
502
+
503
+ @Override
504
+ protected WritableArray doInBackground(Void... voids) {
505
+ try {
506
+ LlamaContext context = contexts.get(contextId);
507
+ if (context == null) {
508
+ throw new Exception("Context not found");
509
+ }
510
+ return context.getRerank(query, documents, params);
511
+ } catch (Exception e) {
512
+ exception = e;
513
+ }
514
+ return null;
515
+ }
516
+
517
+ @Override
518
+ protected void onPostExecute(WritableArray result) {
519
+ if (exception != null) {
520
+ promise.reject(exception);
521
+ return;
522
+ }
523
+ promise.resolve(result);
524
+ tasks.remove(this);
525
+ }
526
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
527
+ tasks.put(task, "rerank-" + contextId);
528
+ }
529
+
497
530
  public void bench(double id, final double pp, final double tg, final double pl, final double nr, final Promise promise) {
498
531
  final int contextId = (int) id;
499
532
  AsyncTask task = new AsyncTask<Void, Void, String>() {
@@ -759,6 +792,210 @@ public class RNLlama implements LifecycleEventListener {
759
792
  tasks.put(task, "releaseMultimodal" + id);
760
793
  }
761
794
 
795
+ public void initVocoder(double id, final String vocoderModelPath, final Promise promise) {
796
+ final int contextId = (int) id;
797
+ AsyncTask task = new AsyncTask<Void, Void, Boolean>() {
798
+ private Exception exception;
799
+
800
+ @Override
801
+ protected Boolean doInBackground(Void... voids) {
802
+ try {
803
+ LlamaContext context = contexts.get(contextId);
804
+ if (context == null) {
805
+ throw new Exception("Context not found");
806
+ }
807
+ if (context.isPredicting()) {
808
+ throw new Exception("Context is busy");
809
+ }
810
+ return context.initVocoder(vocoderModelPath);
811
+ } catch (Exception e) {
812
+ exception = e;
813
+ }
814
+ return false;
815
+ }
816
+
817
+ @Override
818
+ protected void onPostExecute(Boolean result) {
819
+ if (exception != null) {
820
+ promise.reject(exception);
821
+ return;
822
+ }
823
+ promise.resolve(result);
824
+ tasks.remove(this);
825
+ }
826
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
827
+ tasks.put(task, "initVocoder-" + contextId);
828
+ }
829
+
830
+ public void releaseVocoder(double id, final Promise promise) {
831
+ final int contextId = (int) id;
832
+ AsyncTask task = new AsyncTask<Void, Void, Void>() {
833
+ private Exception exception;
834
+
835
+ @Override
836
+ protected Void doInBackground(Void... voids) {
837
+ try {
838
+ LlamaContext context = contexts.get(contextId);
839
+ if (context == null) {
840
+ throw new Exception("Context not found");
841
+ }
842
+ context.releaseVocoder();
843
+ } catch (Exception e) {
844
+ exception = e;
845
+ }
846
+ return null;
847
+ }
848
+
849
+ @Override
850
+ protected void onPostExecute(Void result) {
851
+ if (exception != null) {
852
+ promise.reject(exception);
853
+ return;
854
+ }
855
+ promise.resolve(null);
856
+ tasks.remove(this);
857
+ }
858
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
859
+ tasks.put(task, "releaseVocoder-" + contextId);
860
+ }
861
+
862
+ public void isVocoderEnabled(double id, final Promise promise) {
863
+ final int contextId = (int) id;
864
+ AsyncTask task = new AsyncTask<Void, Void, Boolean>() {
865
+ private Exception exception;
866
+
867
+ @Override
868
+ protected Boolean doInBackground(Void... voids) {
869
+ try {
870
+ LlamaContext context = contexts.get(contextId);
871
+ if (context == null) {
872
+ throw new Exception("Context not found");
873
+ }
874
+ return context.isVocoderEnabled();
875
+ } catch (Exception e) {
876
+ exception = e;
877
+ }
878
+ return false;
879
+ }
880
+
881
+ @Override
882
+ protected void onPostExecute(Boolean result) {
883
+ if (exception != null) {
884
+ promise.reject(exception);
885
+ return;
886
+ }
887
+ promise.resolve(result);
888
+ tasks.remove(this);
889
+ }
890
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
891
+ tasks.put(task, "isVocoderEnabled-" + contextId);
892
+ }
893
+
894
+ public void getFormattedAudioCompletion(double id, final String speakerJsonStr, final String textToSpeak, Promise promise) {
895
+ final int contextId = (int) id;
896
+ AsyncTask task = new AsyncTask<Void, Void, String>() {
897
+ private Exception exception;
898
+
899
+ @Override
900
+ protected String doInBackground(Void... voids) {
901
+ try {
902
+ LlamaContext context = contexts.get(contextId);
903
+ if (context == null) {
904
+ throw new Exception("Context not found");
905
+ }
906
+ if (!context.isVocoderEnabled()) {
907
+ throw new Exception("Vocoder is not enabled");
908
+ }
909
+ return context.getFormattedAudioCompletion(speakerJsonStr, textToSpeak);
910
+ } catch (Exception e) {
911
+ exception = e;
912
+ return null;
913
+ }
914
+ }
915
+
916
+ @Override
917
+ protected void onPostExecute(String result) {
918
+ if (exception != null) {
919
+ promise.reject(exception);
920
+ return;
921
+ }
922
+ promise.resolve(result);
923
+ tasks.remove(this);
924
+ }
925
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
926
+ tasks.put(task, "getFormattedAudioCompletion-" + contextId);
927
+ }
928
+
929
+ public void getAudioCompletionGuideTokens(double id, final String textToSpeak, final Promise promise) {
930
+ final int contextId = (int) id;
931
+ AsyncTask task = new AsyncTask<Void, Void, WritableArray>() {
932
+ private Exception exception;
933
+
934
+ @Override
935
+ protected WritableArray doInBackground(Void... voids) {
936
+ try {
937
+ LlamaContext context = contexts.get(contextId);
938
+ if (context == null) {
939
+ throw new Exception("Context not found");
940
+ }
941
+ if (!context.isVocoderEnabled()) {
942
+ throw new Exception("Vocoder is not enabled");
943
+ }
944
+ return context.getAudioCompletionGuideTokens(textToSpeak);
945
+ } catch (Exception e) {
946
+ exception = e;
947
+ return null;
948
+ }
949
+ }
950
+
951
+ @Override
952
+ protected void onPostExecute(WritableArray result) {
953
+ if (exception != null) {
954
+ promise.reject(exception);
955
+ return;
956
+ }
957
+ promise.resolve(result);
958
+ tasks.remove(this);
959
+ }
960
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
961
+ tasks.put(task, "getAudioCompletionGuideTokens-" + contextId);
962
+ }
963
+
964
+ public void decodeAudioTokens(double id, final ReadableArray tokens, final Promise promise) {
965
+ final int contextId = (int) id;
966
+ AsyncTask task = new AsyncTask<Void, Void, WritableArray>() {
967
+ private Exception exception;
968
+
969
+ @Override
970
+ protected WritableArray doInBackground(Void... voids) {
971
+ try {
972
+ LlamaContext context = contexts.get(contextId);
973
+ if (context == null) {
974
+ throw new Exception("Context not found");
975
+ }
976
+ if (!context.isVocoderEnabled()) {
977
+ throw new Exception("Vocoder is not enabled");
978
+ }
979
+ return context.decodeAudioTokens(tokens);
980
+ } catch (Exception e) {
981
+ exception = e;
982
+ }
983
+ return null;
984
+ }
985
+
986
+ @Override
987
+ protected void onPostExecute(WritableArray result) {
988
+ if (exception != null) {
989
+ promise.reject(exception);
990
+ return;
991
+ }
992
+ promise.resolve(result);
993
+ tasks.remove(this);
994
+ }
995
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
996
+ tasks.put(task, "decodeAudioTokens-" + contextId);
997
+ }
998
+
762
999
  public void releaseContext(double id, Promise promise) {
763
1000
  final int contextId = (int) id;
764
1001
  AsyncTask task = new AsyncTask<Void, Void, Void>() {
@@ -9,7 +9,7 @@
9
9
  #include <string>
10
10
  #include <thread>
11
11
  #include <unordered_map>
12
- #include "json.hpp"
12
+ #include <nlohmann/json.hpp>
13
13
  #include "json-schema-to-grammar.h"
14
14
  #include "llama.h"
15
15
  #include "chat.h"
@@ -233,7 +233,6 @@ Java_com_rnllama_LlamaContext_initContext(
233
233
  jobject thiz,
234
234
  jstring model_path_str,
235
235
  jstring chat_template,
236
- jstring reasoning_format,
237
236
  jboolean embedding,
238
237
  jint embd_normalize,
239
238
  jint n_ctx,
@@ -271,13 +270,6 @@ Java_com_rnllama_LlamaContext_initContext(
271
270
  const char *chat_template_chars = env->GetStringUTFChars(chat_template, nullptr);
272
271
  defaultParams.chat_template = chat_template_chars;
273
272
 
274
- const char *reasoning_format_chars = env->GetStringUTFChars(reasoning_format, nullptr);
275
- if (strcmp(reasoning_format_chars, "deepseek") == 0) {
276
- defaultParams.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
277
- } else {
278
- defaultParams.reasoning_format = COMMON_REASONING_FORMAT_NONE;
279
- }
280
-
281
273
  defaultParams.n_ctx = n_ctx;
282
274
  defaultParams.n_batch = n_batch;
283
275
  defaultParams.n_ubatch = n_ubatch;
@@ -346,7 +338,6 @@ Java_com_rnllama_LlamaContext_initContext(
346
338
 
347
339
  env->ReleaseStringUTFChars(model_path_str, model_path_chars);
348
340
  env->ReleaseStringUTFChars(chat_template, chat_template_chars);
349
- env->ReleaseStringUTFChars(reasoning_format, reasoning_format_chars);
350
341
  env->ReleaseStringUTFChars(cache_type_k, cache_type_k_chars);
351
342
  env->ReleaseStringUTFChars(cache_type_v, cache_type_v_chars);
352
343
 
@@ -493,7 +484,8 @@ Java_com_rnllama_LlamaContext_getFormattedChatWithJinja(
493
484
  jstring json_schema,
494
485
  jstring tools,
495
486
  jboolean parallel_tool_calls,
496
- jstring tool_choice
487
+ jstring tool_choice,
488
+ jboolean enable_thinking
497
489
  ) {
498
490
  UNUSED(thiz);
499
491
  auto llama = context_map[(long) context_ptr];
@@ -512,7 +504,8 @@ Java_com_rnllama_LlamaContext_getFormattedChatWithJinja(
512
504
  json_schema_chars,
513
505
  tools_chars,
514
506
  parallel_tool_calls,
515
- tool_choice_chars
507
+ tool_choice_chars,
508
+ enable_thinking
516
509
  );
517
510
  putString(env, result, "prompt", formatted.prompt.c_str());
518
511
  putInt(env, result, "chat_format", static_cast<int>(formatted.format));
@@ -526,6 +519,7 @@ Java_com_rnllama_LlamaContext_getFormattedChatWithJinja(
526
519
  putInt(env, trigger_map, "token", trigger.token);
527
520
  pushMap(env, grammar_triggers, trigger_map);
528
521
  }
522
+ putBoolean(env, result, "thinking_forced_open", formatted.thinking_forced_open);
529
523
  putArray(env, result, "grammar_triggers", grammar_triggers);
530
524
  auto preserved_tokens = createWritableArray(env);
531
525
  for (const auto &token : formatted.preserved_tokens) {
@@ -537,7 +531,7 @@ Java_com_rnllama_LlamaContext_getFormattedChatWithJinja(
537
531
  pushString(env, additional_stops, stop.c_str());
538
532
  }
539
533
  putArray(env, result, "additional_stops", additional_stops);
540
- } catch (const nlohmann::json_abi_v3_11_3::detail::parse_error& e) {
534
+ } catch (const nlohmann::json_abi_v3_12_0::detail::parse_error& e) {
541
535
  std::string errorMessage = "JSON parse error in getFormattedChat: " + std::string(e.what());
542
536
  putString(env, result, "_error", errorMessage.c_str());
543
537
  LOGI("[RNLlama] %s", errorMessage.c_str());
@@ -668,18 +662,33 @@ static inline jobject tokenProbsToMap(
668
662
  return result;
669
663
  }
670
664
 
665
+ static inline jobject tokensToArray(
666
+ JNIEnv *env,
667
+ rnllama::llama_rn_context *llama,
668
+ std::vector<llama_token> tokens
669
+ ) {
670
+ auto result = createWritableArray(env);
671
+ for (const auto &token : tokens) {
672
+ pushInt(env, result, token);
673
+ }
674
+ return result;
675
+ }
676
+
671
677
  JNIEXPORT jobject JNICALL
672
678
  Java_com_rnllama_LlamaContext_doCompletion(
673
679
  JNIEnv *env,
674
680
  jobject thiz,
675
681
  jlong context_ptr,
676
682
  jstring prompt,
683
+ jintArray guide_tokens,
677
684
  jint chat_format,
685
+ jstring reasoning_format,
678
686
  jstring grammar,
679
687
  jstring json_schema,
680
688
  jboolean grammar_lazy,
681
689
  jobject grammar_triggers,
682
690
  jobject preserved_tokens,
691
+ jboolean thinking_forced_open,
683
692
  jfloat temperature,
684
693
  jint n_threads,
685
694
  jint n_predict,
@@ -722,6 +731,18 @@ Java_com_rnllama_LlamaContext_doCompletion(
722
731
  // Set the prompt parameter
723
732
  llama->params.prompt = prompt_chars;
724
733
 
734
+ // Set the guide tokens parameter
735
+ if (guide_tokens != nullptr) {
736
+ int guide_tokens_size = env->GetArrayLength(guide_tokens);
737
+ int *guide_tokens_array = env->GetIntArrayElements(guide_tokens, nullptr);
738
+ std::vector<llama_token> guide_tokens_vector(guide_tokens_size);
739
+ for (int i = 0; i < guide_tokens_size; i++) {
740
+ guide_tokens_vector[i] = guide_tokens_array[i];
741
+ }
742
+ env->ReleaseIntArrayElements(guide_tokens, guide_tokens_array, 0);
743
+ llama->setGuideTokens(guide_tokens_vector);
744
+ }
745
+
725
746
  // Process image paths if provided
726
747
  std::vector<std::string> media_paths_vector;
727
748
 
@@ -908,6 +929,11 @@ Java_com_rnllama_LlamaContext_doCompletion(
908
929
  auto result = createWriteableMap(env);
909
930
  putString(env, result, "error", e.what());
910
931
  return reinterpret_cast<jobject>(result);
932
+ } catch (const std::runtime_error& e) {
933
+ llama->endCompletion();
934
+ auto result = createWriteableMap(env);
935
+ putString(env, result, "error", e.what());
936
+ return reinterpret_cast<jobject>(result);
911
937
  }
912
938
 
913
939
  if (llama->context_full) {
@@ -993,7 +1019,24 @@ Java_com_rnllama_LlamaContext_doCompletion(
993
1019
  auto toolCallsSize = 0;
994
1020
  if (!llama->is_interrupted) {
995
1021
  try {
996
- common_chat_msg message = common_chat_parse(llama->generated_text, static_cast<common_chat_format>(chat_format));
1022
+ common_chat_syntax chat_syntax;
1023
+ chat_syntax.format = static_cast<common_chat_format>(chat_format);
1024
+
1025
+ const char *reasoning_format_chars = env->GetStringUTFChars(reasoning_format, nullptr);
1026
+ if (strcmp(reasoning_format_chars, "deepseek") == 0) {
1027
+ chat_syntax.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
1028
+ } else if (strcmp(reasoning_format_chars, "deepseek-legacy") == 0) {
1029
+ chat_syntax.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY;
1030
+ } else {
1031
+ chat_syntax.reasoning_format = COMMON_REASONING_FORMAT_NONE;
1032
+ }
1033
+ chat_syntax.thinking_forced_open = thinking_forced_open;
1034
+ env->ReleaseStringUTFChars(reasoning_format, reasoning_format_chars);
1035
+ common_chat_msg message = common_chat_parse(
1036
+ llama->generated_text,
1037
+ false,
1038
+ chat_syntax
1039
+ );
997
1040
  if (!message.reasoning_content.empty()) {
998
1041
  reasoningContent = message.reasoning_content;
999
1042
  }
@@ -1027,6 +1070,7 @@ Java_com_rnllama_LlamaContext_doCompletion(
1027
1070
  if (toolCallsSize > 0) {
1028
1071
  putArray(env, result, "tool_calls", toolCalls);
1029
1072
  }
1073
+ putArray(env, result, "audio_tokens", tokensToArray(env, llama, llama->audio_tokens));
1030
1074
  putArray(env, result, "completion_probabilities", tokenProbsToMap(env, llama, llama->generated_token_probs));
1031
1075
  putInt(env, result, "tokens_predicted", llama->num_tokens_predicted);
1032
1076
  putInt(env, result, "tokens_evaluated", llama->num_prompt_tokens);
@@ -1189,6 +1233,9 @@ Java_com_rnllama_LlamaContext_embedding(
1189
1233
  } catch (const std::exception &e) {
1190
1234
  putString(env, result, "error", e.what());
1191
1235
  return reinterpret_cast<jobject>(result);
1236
+ } catch (const std::runtime_error& e) {
1237
+ putString(env, result, "error", e.what());
1238
+ return reinterpret_cast<jobject>(result);
1192
1239
  }
1193
1240
  llama->doCompletion();
1194
1241
 
@@ -1210,6 +1257,54 @@ Java_com_rnllama_LlamaContext_embedding(
1210
1257
  return result;
1211
1258
  }
1212
1259
 
1260
+ JNIEXPORT jobject JNICALL
1261
+ Java_com_rnllama_LlamaContext_rerank(
1262
+ JNIEnv *env, jobject thiz,
1263
+ jlong context_ptr,
1264
+ jstring query,
1265
+ jobjectArray documents,
1266
+ jint normalize
1267
+ ) {
1268
+ UNUSED(thiz);
1269
+ auto llama = context_map[(long) context_ptr];
1270
+
1271
+ const char *query_chars = env->GetStringUTFChars(query, nullptr);
1272
+
1273
+ // Convert Java string array to C++ vector
1274
+ std::vector<std::string> documents_vector;
1275
+ int documents_size = env->GetArrayLength(documents);
1276
+ for (int i = 0; i < documents_size; i++) {
1277
+ jstring document = (jstring) env->GetObjectArrayElement(documents, i);
1278
+ const char *document_chars = env->GetStringUTFChars(document, nullptr);
1279
+ documents_vector.push_back(document_chars);
1280
+ env->ReleaseStringUTFChars(document, document_chars);
1281
+ }
1282
+
1283
+ auto result = createWritableArray(env);
1284
+
1285
+ try {
1286
+ std::vector<float> scores = llama->rerank(query_chars, documents_vector);
1287
+
1288
+ for (size_t i = 0; i < scores.size(); i++) {
1289
+ auto item = createWriteableMap(env);
1290
+ putDouble(env, item, "score", (double) scores[i]);
1291
+ putInt(env, item, "index", (int) i);
1292
+ pushMap(env, result, item);
1293
+ }
1294
+ } catch (const std::exception &e) {
1295
+ auto error_item = createWriteableMap(env);
1296
+ putString(env, error_item, "error", e.what());
1297
+ pushMap(env, result, error_item);
1298
+ } catch (const std::runtime_error& e) {
1299
+ auto error_item = createWriteableMap(env);
1300
+ putString(env, error_item, "error", e.what());
1301
+ pushMap(env, result, error_item);
1302
+ }
1303
+
1304
+ env->ReleaseStringUTFChars(query, query_chars);
1305
+ return result;
1306
+ }
1307
+
1213
1308
  JNIEXPORT jstring JNICALL
1214
1309
  Java_com_rnllama_LlamaContext_bench(
1215
1310
  JNIEnv *env,
@@ -1415,4 +1510,108 @@ Java_com_rnllama_LlamaContext_releaseMultimodal(
1415
1510
  llama->releaseMultimodal();
1416
1511
  }
1417
1512
 
1513
+ JNIEXPORT jboolean JNICALL
1514
+ Java_com_rnllama_LlamaContext_initVocoder(
1515
+ JNIEnv *env,
1516
+ jobject thiz,
1517
+ jlong context_ptr,
1518
+ jstring vocoder_model_path
1519
+ ) {
1520
+ UNUSED(env);
1521
+ UNUSED(thiz);
1522
+ auto llama = context_map[(long) context_ptr];
1523
+ const char *vocoder_model_path_chars = env->GetStringUTFChars(vocoder_model_path, nullptr);
1524
+ bool result = llama->initVocoder(vocoder_model_path_chars);
1525
+ env->ReleaseStringUTFChars(vocoder_model_path, vocoder_model_path_chars);
1526
+ return result;
1527
+ }
1528
+
1529
+ JNIEXPORT void JNICALL
1530
+ Java_com_rnllama_LlamaContext_releaseVocoder(
1531
+ JNIEnv *env,
1532
+ jobject thiz,
1533
+ jlong context_ptr
1534
+ ) {
1535
+ UNUSED(env);
1536
+ UNUSED(thiz);
1537
+ auto llama = context_map[(long) context_ptr];
1538
+ llama->releaseVocoder();
1539
+ }
1540
+
1541
+ JNIEXPORT jboolean JNICALL
1542
+ Java_com_rnllama_LlamaContext_isVocoderEnabled(
1543
+ JNIEnv *env,
1544
+ jobject thiz,
1545
+ jlong context_ptr
1546
+ ) {
1547
+ UNUSED(env);
1548
+ UNUSED(thiz);
1549
+ auto llama = context_map[(long) context_ptr];
1550
+ return llama->isVocoderEnabled();
1551
+ }
1552
+
1553
+ JNIEXPORT jstring JNICALL
1554
+ Java_com_rnllama_LlamaContext_getFormattedAudioCompletion(
1555
+ JNIEnv *env,
1556
+ jobject thiz,
1557
+ jlong context_ptr,
1558
+ jstring speaker_json_str,
1559
+ jstring text_to_speak
1560
+ ) {
1561
+ UNUSED(env);
1562
+ UNUSED(thiz);
1563
+ auto llama = context_map[(long) context_ptr];
1564
+ const char *speaker_json_str_chars = env->GetStringUTFChars(speaker_json_str, nullptr);
1565
+ const char *text_to_speak_chars = env->GetStringUTFChars(text_to_speak, nullptr);
1566
+ std::string result = llama->getFormattedAudioCompletion(speaker_json_str_chars, text_to_speak_chars);
1567
+ env->ReleaseStringUTFChars(speaker_json_str, speaker_json_str_chars);
1568
+ env->ReleaseStringUTFChars(text_to_speak, text_to_speak_chars);
1569
+ return env->NewStringUTF(result.c_str());
1570
+ }
1571
+
1572
+ JNIEXPORT jobject JNICALL
1573
+ Java_com_rnllama_LlamaContext_getAudioCompletionGuideTokens(
1574
+ JNIEnv *env,
1575
+ jobject thiz,
1576
+ jlong context_ptr,
1577
+ jstring text_to_speak
1578
+ ) {
1579
+ UNUSED(env);
1580
+ UNUSED(thiz);
1581
+ auto llama = context_map[(long) context_ptr];
1582
+ const char *text_to_speak_chars = env->GetStringUTFChars(text_to_speak, nullptr);
1583
+ std::vector<llama_token> guide_tokens = llama->getAudioCompletionGuideTokens(text_to_speak_chars);
1584
+ env->ReleaseStringUTFChars(text_to_speak, text_to_speak_chars);
1585
+ auto result = createWritableArray(env);
1586
+ for (const auto &val : guide_tokens) {
1587
+ pushInt(env, result, (int) val);
1588
+ }
1589
+ return result;
1590
+ }
1591
+
1592
+ JNIEXPORT jobject JNICALL
1593
+ Java_com_rnllama_LlamaContext_decodeAudioTokens(
1594
+ JNIEnv *env,
1595
+ jobject thiz,
1596
+ jlong context_ptr,
1597
+ jintArray tokens
1598
+ ) {
1599
+ UNUSED(env);
1600
+ UNUSED(thiz);
1601
+ auto llama = context_map[(long) context_ptr];
1602
+ jsize tokens_size = env->GetArrayLength(tokens);
1603
+ jint *tokens_ptr = env->GetIntArrayElements(tokens, nullptr);
1604
+ std::vector<llama_token> tokens_vec(tokens_size);
1605
+ for (int i = 0; i < tokens_size; i++) {
1606
+ tokens_vec[i] = tokens_ptr[i];
1607
+ }
1608
+ env->ReleaseIntArrayElements(tokens, tokens_ptr, 0);
1609
+ std::vector<float> audio = llama->decodeAudioTokens(tokens_vec);
1610
+ auto result = createWritableArray(env);
1611
+ for (const auto &val : audio) {
1612
+ pushDouble(env, result, (double) val);
1613
+ }
1614
+ return result;
1615
+ }
1616
+
1418
1617
  } // extern "C"
@@ -123,6 +123,11 @@ public class RNLlamaModule extends NativeRNLlamaSpec {
123
123
  rnllama.embedding(id, text, params, promise);
124
124
  }
125
125
 
126
+ @ReactMethod
127
+ public void rerank(double id, final String query, final ReadableArray documents, final ReadableMap params, final Promise promise) {
128
+ rnllama.rerank(id, query, documents, params, promise);
129
+ }
130
+
126
131
  @ReactMethod
127
132
  public void bench(double id, final double pp, final double tg, final double pl, final double nr, final Promise promise) {
128
133
  rnllama.bench(id, pp, tg, pl, nr, promise);
@@ -143,6 +148,36 @@ public class RNLlamaModule extends NativeRNLlamaSpec {
143
148
  rnllama.getLoadedLoraAdapters(id, promise);
144
149
  }
145
150
 
151
+ @ReactMethod
152
+ public void initVocoder(double id, final String vocoderModelPath, final Promise promise) {
153
+ rnllama.initVocoder(id, vocoderModelPath, promise);
154
+ }
155
+
156
+ @ReactMethod
157
+ public void isVocoderEnabled(double id, final Promise promise) {
158
+ rnllama.isVocoderEnabled(id, promise);
159
+ }
160
+
161
+ @ReactMethod
162
+ public void getFormattedAudioCompletion(double id, final String speakerJsonStr, final String textToSpeak, final Promise promise) {
163
+ rnllama.getFormattedAudioCompletion(id, speakerJsonStr, textToSpeak, promise);
164
+ }
165
+
166
+ @ReactMethod
167
+ public void getAudioCompletionGuideTokens(double id, final String textToSpeak, final Promise promise) {
168
+ rnllama.getAudioCompletionGuideTokens(id, textToSpeak, promise);
169
+ }
170
+
171
+ @ReactMethod
172
+ public void decodeAudioTokens(double id, final ReadableArray tokens, final Promise promise) {
173
+ rnllama.decodeAudioTokens(id, tokens, promise);
174
+ }
175
+
176
+ @ReactMethod
177
+ public void releaseVocoder(double id, final Promise promise) {
178
+ rnllama.releaseVocoder(id, promise);
179
+ }
180
+
146
181
  @ReactMethod
147
182
  public void releaseContext(double id, Promise promise) {
148
183
  rnllama.releaseContext(id, promise);