cui-llama.rn 1.7.3 → 1.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. package/README.md +217 -17
  2. package/android/src/main/CMakeLists.txt +34 -15
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +94 -8
  4. package/android/src/main/java/com/rnllama/RNLlama.java +247 -0
  5. package/android/src/main/jni.cpp +213 -14
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  14. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
  15. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
  16. package/cpp/README.md +1 -1
  17. package/cpp/chat-parser.cpp +385 -0
  18. package/cpp/chat-parser.h +120 -0
  19. package/cpp/chat.cpp +726 -596
  20. package/cpp/chat.h +71 -6
  21. package/cpp/common.cpp +56 -38
  22. package/cpp/common.h +9 -3
  23. package/cpp/ggml-backend-reg.cpp +5 -0
  24. package/cpp/ggml-backend.cpp +10 -2
  25. package/cpp/ggml-common.h +4 -0
  26. package/cpp/ggml-cpu/amx/amx.cpp +1 -1
  27. package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
  28. package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  29. package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
  30. package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
  31. package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  32. package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
  33. package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
  34. package/cpp/ggml-cpu/arch-fallback.h +184 -0
  35. package/cpp/ggml-cpu/common.h +4 -3
  36. package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
  37. package/cpp/ggml-cpu/ggml-cpu.c +123 -104
  38. package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
  39. package/cpp/ggml-cpu/ops.cpp +330 -148
  40. package/cpp/ggml-cpu/ops.h +1 -0
  41. package/cpp/ggml-cpu/quants.c +1158 -0
  42. package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
  43. package/cpp/ggml-cpu/repack.cpp +1571 -0
  44. package/cpp/ggml-cpu/repack.h +98 -0
  45. package/cpp/ggml-cpu/simd-mappings.h +330 -38
  46. package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
  47. package/cpp/ggml-cpu/vec.cpp +87 -18
  48. package/cpp/ggml-cpu/vec.h +249 -94
  49. package/cpp/ggml-cpu.h +1 -0
  50. package/cpp/ggml-impl.h +63 -183
  51. package/cpp/ggml-llama-sim.metallib +0 -0
  52. package/cpp/ggml-llama.metallib +0 -0
  53. package/cpp/ggml-metal.m +152 -45
  54. package/cpp/ggml-quants.c +0 -2
  55. package/cpp/ggml.c +61 -21
  56. package/cpp/ggml.h +22 -3
  57. package/cpp/gguf.cpp +24 -3
  58. package/cpp/json-partial.cpp +256 -0
  59. package/cpp/json-partial.h +38 -0
  60. package/cpp/json-schema-to-grammar.cpp +5 -47
  61. package/cpp/json-schema-to-grammar.h +4 -4
  62. package/cpp/llama-arch.cpp +153 -3
  63. package/cpp/llama-arch.h +27 -1
  64. package/cpp/llama-batch.cpp +741 -272
  65. package/cpp/llama-batch.h +112 -54
  66. package/cpp/llama-chat.cpp +30 -8
  67. package/cpp/llama-chat.h +1 -0
  68. package/cpp/llama-context.cpp +524 -339
  69. package/cpp/llama-context.h +38 -17
  70. package/cpp/llama-cparams.cpp +4 -0
  71. package/cpp/llama-cparams.h +2 -0
  72. package/cpp/llama-grammar.cpp +12 -2
  73. package/cpp/llama-graph.cpp +431 -356
  74. package/cpp/llama-graph.h +126 -58
  75. package/cpp/llama-hparams.cpp +10 -2
  76. package/cpp/llama-hparams.h +19 -2
  77. package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
  78. package/cpp/llama-kv-cache-unified-iswa.h +128 -0
  79. package/cpp/llama-kv-cache-unified.cpp +1841 -0
  80. package/cpp/llama-kv-cache-unified.h +303 -0
  81. package/cpp/llama-kv-cells.h +439 -0
  82. package/cpp/llama-memory-hybrid.cpp +246 -0
  83. package/cpp/llama-memory-hybrid.h +138 -0
  84. package/cpp/llama-memory-recurrent.cpp +1112 -0
  85. package/cpp/llama-memory-recurrent.h +183 -0
  86. package/cpp/llama-memory.cpp +41 -0
  87. package/cpp/llama-memory.h +86 -5
  88. package/cpp/llama-mmap.cpp +1 -1
  89. package/cpp/llama-model-loader.cpp +42 -17
  90. package/cpp/llama-model-saver.cpp +1 -0
  91. package/cpp/llama-model.cpp +1639 -513
  92. package/cpp/llama-model.h +26 -0
  93. package/cpp/llama-sampling.cpp +2 -2
  94. package/cpp/llama-vocab.cpp +65 -28
  95. package/cpp/llama-vocab.h +1 -0
  96. package/cpp/llama.cpp +11 -7
  97. package/cpp/llama.h +150 -42
  98. package/cpp/minja/chat-template.hpp +1 -1
  99. package/cpp/minja/minja.hpp +1 -1
  100. package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
  101. package/cpp/nlohmann/json_fwd.hpp +187 -0
  102. package/cpp/regex-partial.cpp +204 -0
  103. package/cpp/regex-partial.h +56 -0
  104. package/cpp/rn-llama.cpp +646 -35
  105. package/cpp/rn-llama.h +32 -1
  106. package/cpp/rn-tts.h +39 -0
  107. package/cpp/sampling.cpp +7 -8
  108. package/cpp/tools/mtmd/clip-impl.h +5 -0
  109. package/cpp/tools/mtmd/clip.cpp +572 -436
  110. package/cpp/tools/mtmd/clip.h +14 -4
  111. package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
  112. package/cpp/tools/mtmd/mtmd-audio.h +2 -17
  113. package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
  114. package/cpp/tools/mtmd/mtmd-helper.h +91 -0
  115. package/cpp/tools/mtmd/mtmd.cpp +368 -248
  116. package/cpp/tools/mtmd/mtmd.h +6 -70
  117. package/cpp/unicode.cpp +5 -0
  118. package/ios/CMakeLists.txt +26 -6
  119. package/ios/RNLlama.h +1 -1
  120. package/ios/RNLlama.mm +153 -3
  121. package/ios/RNLlamaContext.h +9 -1
  122. package/ios/RNLlamaContext.mm +112 -9
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
  131. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  132. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
  133. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
  134. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  135. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
  136. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
  137. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
  138. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
  139. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  140. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  141. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  142. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  143. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  144. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
  145. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
  146. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
  147. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
  148. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  149. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
  150. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
  151. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  152. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
  153. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
  154. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
  155. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  156. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
  160. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
  161. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
  162. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
  163. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
  164. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
  165. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  166. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
  167. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
  168. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  169. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
  170. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
  171. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
  172. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
  173. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  174. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  175. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  176. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  177. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  178. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
  179. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
  180. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
  181. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
  182. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  183. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
  184. package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
  185. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  186. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
  187. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
  188. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
  189. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  190. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  191. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
  192. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
  193. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
  194. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
  195. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
  196. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
  197. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
  198. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
  199. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  200. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
  201. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
  202. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  203. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
  204. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
  205. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
  206. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
  207. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  208. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  209. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  210. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  211. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  212. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
  213. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
  214. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
  215. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
  216. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  217. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
  218. package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
  219. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  220. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
  221. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
  222. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
  223. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  224. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  225. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
  226. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
  227. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
  228. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
  229. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
  230. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
  231. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
  232. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
  233. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  234. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
  235. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
  236. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  237. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
  238. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
  239. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
  240. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
  241. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  242. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  243. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  244. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  245. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  246. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
  247. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
  248. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
  249. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
  250. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  251. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
  252. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
  253. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  254. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
  255. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
  256. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
  257. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  258. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  259. package/jest/mock.js +24 -0
  260. package/package.json +1 -1
  261. package/src/NativeRNLlama.ts +46 -2
  262. package/src/index.ts +105 -1
  263. package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  264. package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
  265. package/cpp/ggml-cpu/sgemm.cpp +0 -3544
  266. package/cpp/ggml-cpu/sgemm.h +0 -14
  267. package/cpp/llama-kv-cache.cpp +0 -2827
  268. package/cpp/llama-kv-cache.h +0 -515
  269. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  270. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  271. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  272. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
  273. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  274. /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
  275. /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
  276. /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
@@ -0,0 +1,187 @@
1
+ // __ _____ _____ _____
2
+ // __| | __| | | | JSON for Modern C++
3
+ // | | |__ | | | | | | version 3.12.0
4
+ // |_____|_____|_____|_|___| https://github.com/nlohmann/json
5
+ //
6
+ // SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann <https://nlohmann.me>
7
+ // SPDX-License-Identifier: MIT
8
+
9
+ #ifndef INCLUDE_NLOHMANN_JSON_FWD_HPP_
10
+ #define INCLUDE_NLOHMANN_JSON_FWD_HPP_
11
+
12
+ #include <cstdint> // int64_t, uint64_t
13
+ #include <map> // map
14
+ #include <memory> // allocator
15
+ #include <string> // string
16
+ #include <vector> // vector
17
+
18
+ // #include <nlohmann/detail/abi_macros.hpp>
19
+ // __ _____ _____ _____
20
+ // __| | __| | | | JSON for Modern C++
21
+ // | | |__ | | | | | | version 3.12.0
22
+ // |_____|_____|_____|_|___| https://github.com/nlohmann/json
23
+ //
24
+ // SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann <https://nlohmann.me>
25
+ // SPDX-License-Identifier: MIT
26
+
27
+
28
+
29
+ // This file contains all macro definitions affecting or depending on the ABI
30
+
31
+ #ifndef JSON_SKIP_LIBRARY_VERSION_CHECK
32
+ #if defined(NLOHMANN_JSON_VERSION_MAJOR) && defined(NLOHMANN_JSON_VERSION_MINOR) && defined(NLOHMANN_JSON_VERSION_PATCH)
33
+ #if NLOHMANN_JSON_VERSION_MAJOR != 3 || NLOHMANN_JSON_VERSION_MINOR != 12 || NLOHMANN_JSON_VERSION_PATCH != 0
34
+ #warning "Already included a different version of the library!"
35
+ #endif
36
+ #endif
37
+ #endif
38
+
39
+ #define NLOHMANN_JSON_VERSION_MAJOR 3 // NOLINT(modernize-macro-to-enum)
40
+ #define NLOHMANN_JSON_VERSION_MINOR 12 // NOLINT(modernize-macro-to-enum)
41
+ #define NLOHMANN_JSON_VERSION_PATCH 0 // NOLINT(modernize-macro-to-enum)
42
+
43
+ #ifndef JSON_DIAGNOSTICS
44
+ #define JSON_DIAGNOSTICS 0
45
+ #endif
46
+
47
+ #ifndef JSON_DIAGNOSTIC_POSITIONS
48
+ #define JSON_DIAGNOSTIC_POSITIONS 0
49
+ #endif
50
+
51
+ #ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON
52
+ #define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0
53
+ #endif
54
+
55
+ #if JSON_DIAGNOSTICS
56
+ #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS _diag
57
+ #else
58
+ #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS
59
+ #endif
60
+
61
+ #if JSON_DIAGNOSTIC_POSITIONS
62
+ #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS _dp
63
+ #else
64
+ #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS
65
+ #endif
66
+
67
+ #if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON
68
+ #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp
69
+ #else
70
+ #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON
71
+ #endif
72
+
73
+ #ifndef NLOHMANN_JSON_NAMESPACE_NO_VERSION
74
+ #define NLOHMANN_JSON_NAMESPACE_NO_VERSION 0
75
+ #endif
76
+
77
+ // Construct the namespace ABI tags component
78
+ #define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c) json_abi ## a ## b ## c
79
+ #define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b, c) \
80
+ NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c)
81
+
82
+ #define NLOHMANN_JSON_ABI_TAGS \
83
+ NLOHMANN_JSON_ABI_TAGS_CONCAT( \
84
+ NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \
85
+ NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON, \
86
+ NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS)
87
+
88
+ // Construct the namespace version component
89
+ #define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \
90
+ _v ## major ## _ ## minor ## _ ## patch
91
+ #define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(major, minor, patch) \
92
+ NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch)
93
+
94
+ #if NLOHMANN_JSON_NAMESPACE_NO_VERSION
95
+ #define NLOHMANN_JSON_NAMESPACE_VERSION
96
+ #else
97
+ #define NLOHMANN_JSON_NAMESPACE_VERSION \
98
+ NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(NLOHMANN_JSON_VERSION_MAJOR, \
99
+ NLOHMANN_JSON_VERSION_MINOR, \
100
+ NLOHMANN_JSON_VERSION_PATCH)
101
+ #endif
102
+
103
+ // Combine namespace components
104
+ #define NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) a ## b
105
+ #define NLOHMANN_JSON_NAMESPACE_CONCAT(a, b) \
106
+ NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b)
107
+
108
+ #ifndef NLOHMANN_JSON_NAMESPACE
109
+ #define NLOHMANN_JSON_NAMESPACE \
110
+ nlohmann::NLOHMANN_JSON_NAMESPACE_CONCAT( \
111
+ NLOHMANN_JSON_ABI_TAGS, \
112
+ NLOHMANN_JSON_NAMESPACE_VERSION)
113
+ #endif
114
+
115
+ #ifndef NLOHMANN_JSON_NAMESPACE_BEGIN
116
+ #define NLOHMANN_JSON_NAMESPACE_BEGIN \
117
+ namespace nlohmann \
118
+ { \
119
+ inline namespace NLOHMANN_JSON_NAMESPACE_CONCAT( \
120
+ NLOHMANN_JSON_ABI_TAGS, \
121
+ NLOHMANN_JSON_NAMESPACE_VERSION) \
122
+ {
123
+ #endif
124
+
125
+ #ifndef NLOHMANN_JSON_NAMESPACE_END
126
+ #define NLOHMANN_JSON_NAMESPACE_END \
127
+ } /* namespace (inline namespace) NOLINT(readability/namespace) */ \
128
+ } // namespace nlohmann
129
+ #endif
130
+
131
+
132
+ /*!
133
+ @brief namespace for Niels Lohmann
134
+ @see https://github.com/nlohmann
135
+ @since version 1.0.0
136
+ */
137
+ NLOHMANN_JSON_NAMESPACE_BEGIN
138
+
139
+ /*!
140
+ @brief default JSONSerializer template argument
141
+
142
+ This serializer ignores the template arguments and uses ADL
143
+ ([argument-dependent lookup](https://en.cppreference.com/w/cpp/language/adl))
144
+ for serialization.
145
+ */
146
+ template<typename T = void, typename SFINAE = void>
147
+ struct adl_serializer;
148
+
149
+ /// a class to store JSON values
150
+ /// @sa https://json.nlohmann.me/api/basic_json/
151
+ template<template<typename U, typename V, typename... Args> class ObjectType =
152
+ std::map,
153
+ template<typename U, typename... Args> class ArrayType = std::vector,
154
+ class StringType = std::string, class BooleanType = bool,
155
+ class NumberIntegerType = std::int64_t,
156
+ class NumberUnsignedType = std::uint64_t,
157
+ class NumberFloatType = double,
158
+ template<typename U> class AllocatorType = std::allocator,
159
+ template<typename T, typename SFINAE = void> class JSONSerializer =
160
+ adl_serializer,
161
+ class BinaryType = std::vector<std::uint8_t>, // cppcheck-suppress syntaxError
162
+ class CustomBaseClass = void>
163
+ class basic_json;
164
+
165
+ /// @brief JSON Pointer defines a string syntax for identifying a specific value within a JSON document
166
+ /// @sa https://json.nlohmann.me/api/json_pointer/
167
+ template<typename RefStringType>
168
+ class json_pointer;
169
+
170
+ /*!
171
+ @brief default specialization
172
+ @sa https://json.nlohmann.me/api/json/
173
+ */
174
+ using json = basic_json<>;
175
+
176
+ /// @brief a minimal map-like container that preserves insertion order
177
+ /// @sa https://json.nlohmann.me/api/ordered_map/
178
+ template<class Key, class T, class IgnoredLess, class Allocator>
179
+ struct ordered_map;
180
+
181
+ /// @brief specialization that maintains the insertion order of object keys
182
+ /// @sa https://json.nlohmann.me/api/ordered_json/
183
+ using ordered_json = basic_json<nlohmann::ordered_map>;
184
+
185
+ NLOHMANN_JSON_NAMESPACE_END
186
+
187
+ #endif // INCLUDE_NLOHMANN_JSON_FWD_HPP_
@@ -0,0 +1,56 @@
1
+ #pragma once
2
+
3
+ #include <regex>
4
+ #include <string>
5
+
6
+ enum common_regex_match_type {
7
+ COMMON_REGEX_MATCH_TYPE_NONE,
8
+ COMMON_REGEX_MATCH_TYPE_PARTIAL,
9
+ COMMON_REGEX_MATCH_TYPE_FULL,
10
+ };
11
+
12
+ struct common_string_range {
13
+ size_t begin;
14
+ size_t end;
15
+ common_string_range(size_t begin, size_t end) : begin(begin), end(end) {
16
+ if (begin > end) {
17
+ throw std::runtime_error("Invalid range");
18
+ }
19
+ }
20
+ // prevent default ctor
21
+ common_string_range() = delete;
22
+ bool empty() const {
23
+ return begin == end;
24
+ }
25
+ bool operator==(const common_string_range & other) const {
26
+ return begin == other.begin && end == other.end;
27
+ }
28
+ };
29
+
30
+ struct common_regex_match {
31
+ common_regex_match_type type = COMMON_REGEX_MATCH_TYPE_NONE;
32
+ std::vector<common_string_range> groups;
33
+
34
+ bool operator==(const common_regex_match & other) const {
35
+ return type == other.type && groups == other.groups;
36
+ }
37
+ bool operator!=(const common_regex_match & other) const {
38
+ return !(*this == other);
39
+ }
40
+ };
41
+
42
+ class common_regex {
43
+ std::string pattern;
44
+ std::regex rx;
45
+ std::regex rx_reversed_partial;
46
+
47
+ public:
48
+ explicit common_regex(const std::string & pattern);
49
+
50
+ common_regex_match search(const std::string & input, size_t pos, bool as_match = false) const;
51
+
52
+ const std::string & str() const { return pattern; }
53
+ };
54
+
55
+ // For testing only (pretty print of failures).
56
+ std::string regex_to_reversed_partial_regex(const std::string & pattern);
@@ -3,6 +3,7 @@
3
3
 
4
4
  #include <sstream>
5
5
  #include <iostream>
6
+ #include <thread>
6
7
  #include "chat.h"
7
8
  #include "common.h"
8
9
  #include "ggml.h"
@@ -10,10 +11,13 @@
10
11
  #include "llama.h"
11
12
  #include "llama-impl.h"
12
13
  #include "sampling.h"
14
+ #include "nlohmann/json.hpp"
13
15
  #if defined(__ANDROID__)
14
16
  #include <android/log.h>
15
17
  #endif
16
18
 
19
+ using json = nlohmann::ordered_json;
20
+
17
21
  namespace rnllama {
18
22
 
19
23
  std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token);
@@ -43,6 +47,8 @@ struct completion_token_output
43
47
 
44
48
  struct llama_rn_context_mtmd;
45
49
 
50
+ struct llama_rn_context_vocoder;
51
+
46
52
  struct llama_rn_tokenize_result {
47
53
  std::vector<llama_token> tokens;
48
54
  bool has_media = false;
@@ -51,6 +57,12 @@ struct llama_rn_tokenize_result {
51
57
  std::vector<size_t> chunk_pos_media; // media only
52
58
  };
53
59
 
60
+ enum tts_type {
61
+ UNKNOWN = -1,
62
+ OUTETTS_V0_2 = 1,
63
+ OUTETTS_V0_3 = 2,
64
+ };
65
+
54
66
  // Main context class
55
67
  struct llama_rn_context {
56
68
  bool is_predicting = false;
@@ -58,6 +70,7 @@ struct llama_rn_context {
58
70
  bool has_next_token = false;
59
71
  std::string generated_text;
60
72
  std::vector<completion_token_output> generated_token_probs;
73
+ std::vector<llama_token> audio_tokens;
61
74
 
62
75
  size_t num_prompt_tokens = 0;
63
76
  size_t num_tokens_predicted = 0;
@@ -69,6 +82,9 @@ struct llama_rn_context {
69
82
  common_params params;
70
83
  common_init_result llama_init;
71
84
 
85
+ bool next_token_uses_guide_token = true;
86
+ std::vector<llama_token> guide_tokens;
87
+
72
88
  llama_model *model = nullptr;
73
89
  float loading_progress = 0;
74
90
  bool is_load_interrupted = false;
@@ -92,6 +108,9 @@ struct llama_rn_context {
92
108
  llama_rn_context_mtmd *mtmd_wrapper = nullptr;
93
109
  bool has_multimodal = false;
94
110
 
111
+ llama_rn_context_vocoder *vocoder_wrapper = nullptr;
112
+ bool has_vocoder = false;
113
+
95
114
  ~llama_rn_context();
96
115
 
97
116
  void rewind();
@@ -104,7 +123,8 @@ struct llama_rn_context {
104
123
  const std::string &json_schema,
105
124
  const std::string &tools,
106
125
  const bool &parallel_tool_calls,
107
- const std::string &tool_choice
126
+ const std::string &tool_choice,
127
+ const bool &enable_thinking
108
128
  ) const;
109
129
  std::string getFormattedChat(
110
130
  const std::string &messages,
@@ -112,12 +132,14 @@ struct llama_rn_context {
112
132
  ) const;
113
133
  void truncatePrompt(std::vector<llama_token> &prompt_tokens);
114
134
  void loadPrompt(const std::vector<std::string> &media_paths);
135
+ void setGuideTokens(const std::vector<llama_token> &tokens);
115
136
  void beginCompletion();
116
137
  void endCompletion();
117
138
  completion_token_output nextToken();
118
139
  size_t findStoppingStrings(const std::string &text, const size_t last_token_size, const stop_type type);
119
140
  completion_token_output doCompletion();
120
141
  std::vector<float> getEmbedding(common_params &embd_params);
142
+ std::vector<float> rerank(const std::string &query, const std::vector<std::string> &documents);
121
143
  std::string bench(int pp, int tg, int pl, int nr);
122
144
  int applyLoraAdapters(std::vector<common_adapter_lora_info> lora);
123
145
  void removeLoraAdapters();
@@ -137,6 +159,15 @@ struct llama_rn_context {
137
159
  );
138
160
 
139
161
  llama_rn_tokenize_result tokenize(const std::string &text, const std::vector<std::string> &media_paths);
162
+
163
+ // Vocoder methods
164
+ bool initVocoder(const std::string &vocoder_model_path);
165
+ tts_type getTTSType(json speaker = nullptr);
166
+ std::string getFormattedAudioCompletion(const std::string &speaker_json_str, const std::string &text_to_speak);
167
+ std::vector<llama_token> getAudioCompletionGuideTokens(const std::string &text_to_speak);
168
+ std::vector<float> decodeAudioTokens(const std::vector<llama_token> &tokens);
169
+ bool isVocoderEnabled() const;
170
+ void releaseVocoder();
140
171
  };
141
172
 
142
173
  // Logging macros
@@ -0,0 +1,39 @@
1
+
2
+
3
+ namespace rnllama {
4
+
5
+ // the default speaker profile is from: https://github.com/edwko/OuteTTS/blob/main/outetts/version/v1/default_speakers/en_male_1.json
6
+ static const std::string default_audio_text = "<|text_start|>the<|text_sep|>overall<|text_sep|>package<|text_sep|>from<|text_sep|>just<|text_sep|>two<|text_sep|>people<|text_sep|>is<|text_sep|>pretty<|text_sep|>remarkable<|text_sep|>sure<|text_sep|>i<|text_sep|>have<|text_sep|>some<|text_sep|>critiques<|text_sep|>about<|text_sep|>some<|text_sep|>of<|text_sep|>the<|text_sep|>gameplay<|text_sep|>aspects<|text_sep|>but<|text_sep|>its<|text_sep|>still<|text_sep|>really<|text_sep|>enjoyable<|text_sep|>and<|text_sep|>it<|text_sep|>looks<|text_sep|>lovely<|text_sep|>";
7
+ static const std::string default_audio_data = R"(<|audio_start|>
8
+ the<|t_0.08|><|code_start|><|257|><|740|><|636|><|913|><|788|><|1703|><|code_end|>
9
+ overall<|t_0.36|><|code_start|><|127|><|201|><|191|><|774|><|700|><|532|><|1056|><|557|><|798|><|298|><|1741|><|747|><|1662|><|1617|><|1702|><|1527|><|368|><|1588|><|1049|><|1008|><|1625|><|747|><|1576|><|728|><|1019|><|1696|><|1765|><|code_end|>
10
+ package<|t_0.56|><|code_start|><|935|><|584|><|1319|><|627|><|1016|><|1491|><|1344|><|1117|><|1526|><|1040|><|239|><|1435|><|951|><|498|><|723|><|1180|><|535|><|789|><|1649|><|1637|><|78|><|465|><|1668|><|901|><|595|><|1675|><|117|><|1009|><|1667|><|320|><|840|><|79|><|507|><|1762|><|1508|><|1228|><|1768|><|802|><|1450|><|1457|><|232|><|639|><|code_end|>
11
+ from<|t_0.19|><|code_start|><|604|><|782|><|1682|><|872|><|1532|><|1600|><|1036|><|1761|><|647|><|1554|><|1371|><|653|><|1595|><|950|><|code_end|>
12
+ just<|t_0.25|><|code_start|><|1782|><|1670|><|317|><|786|><|1748|><|631|><|599|><|1155|><|1364|><|1524|><|36|><|1591|><|889|><|1535|><|541|><|440|><|1532|><|50|><|870|><|code_end|>
13
+ two<|t_0.24|><|code_start|><|1681|><|1510|><|673|><|799|><|805|><|1342|><|330|><|519|><|62|><|640|><|1138|><|565|><|1552|><|1497|><|1552|><|572|><|1715|><|1732|><|code_end|>
14
+ people<|t_0.39|><|code_start|><|593|><|274|><|136|><|740|><|691|><|633|><|1484|><|1061|><|1138|><|1485|><|344|><|428|><|397|><|1562|><|645|><|917|><|1035|><|1449|><|1669|><|487|><|442|><|1484|><|1329|><|1832|><|1704|><|600|><|761|><|653|><|269|><|code_end|>
15
+ is<|t_0.16|><|code_start|><|566|><|583|><|1755|><|646|><|1337|><|709|><|802|><|1008|><|485|><|1583|><|652|><|10|><|code_end|>
16
+ pretty<|t_0.32|><|code_start|><|1818|><|1747|><|692|><|733|><|1010|><|534|><|406|><|1697|><|1053|><|1521|><|1355|><|1274|><|816|><|1398|><|211|><|1218|><|817|><|1472|><|1703|><|686|><|13|><|822|><|445|><|1068|><|code_end|>
17
+ remarkable<|t_0.68|><|code_start|><|230|><|1048|><|1705|><|355|><|706|><|1149|><|1535|><|1787|><|1356|><|1396|><|835|><|1583|><|486|><|1249|><|286|><|937|><|1076|><|1150|><|614|><|42|><|1058|><|705|><|681|><|798|><|934|><|490|><|514|><|1399|><|572|><|1446|><|1703|><|1346|><|1040|><|1426|><|1304|><|664|><|171|><|1530|><|625|><|64|><|1708|><|1830|><|1030|><|443|><|1509|><|1063|><|1605|><|1785|><|721|><|1440|><|923|><|code_end|>
18
+ sure<|t_0.36|><|code_start|><|792|><|1780|><|923|><|1640|><|265|><|261|><|1525|><|567|><|1491|><|1250|><|1730|><|362|><|919|><|1766|><|543|><|1|><|333|><|113|><|970|><|252|><|1606|><|133|><|302|><|1810|><|1046|><|1190|><|1675|><|code_end|>
19
+ i<|t_0.08|><|code_start|><|123|><|439|><|1074|><|705|><|1799|><|637|><|code_end|>
20
+ have<|t_0.16|><|code_start|><|1509|><|599|><|518|><|1170|><|552|><|1029|><|1267|><|864|><|419|><|143|><|1061|><|0|><|code_end|>
21
+ some<|t_0.16|><|code_start|><|619|><|400|><|1270|><|62|><|1370|><|1832|><|917|><|1661|><|167|><|269|><|1366|><|1508|><|code_end|>
22
+ critiques<|t_0.60|><|code_start|><|559|><|584|><|1163|><|1129|><|1313|><|1728|><|721|><|1146|><|1093|><|577|><|928|><|27|><|630|><|1080|><|1346|><|1337|><|320|><|1382|><|1175|><|1682|><|1556|><|990|><|1683|><|860|><|1721|><|110|><|786|><|376|><|1085|><|756|><|1523|><|234|><|1334|><|1506|><|1578|><|659|><|612|><|1108|><|1466|><|1647|><|308|><|1470|><|746|><|556|><|1061|><|code_end|>
23
+ about<|t_0.29|><|code_start|><|26|><|1649|><|545|><|1367|><|1263|><|1728|><|450|><|859|><|1434|><|497|><|1220|><|1285|><|179|><|755|><|1154|><|779|><|179|><|1229|><|1213|><|922|><|1774|><|1408|><|code_end|>
24
+ some<|t_0.23|><|code_start|><|986|><|28|><|1649|><|778|><|858|><|1519|><|1|><|18|><|26|><|1042|><|1174|><|1309|><|1499|><|1712|><|1692|><|1516|><|1574|><|code_end|>
25
+ of<|t_0.07|><|code_start|><|197|><|716|><|1039|><|1662|><|64|><|code_end|>
26
+ the<|t_0.08|><|code_start|><|1811|><|1568|><|569|><|886|><|1025|><|1374|><|code_end|>
27
+ gameplay<|t_0.48|><|code_start|><|1269|><|1092|><|933|><|1362|><|1762|><|1700|><|1675|><|215|><|781|><|1086|><|461|><|838|><|1022|><|759|><|649|><|1416|><|1004|><|551|><|909|><|787|><|343|><|830|><|1391|><|1040|><|1622|><|1779|><|1360|><|1231|><|1187|><|1317|><|76|><|997|><|989|><|978|><|737|><|189|><|code_end|>
28
+ aspects<|t_0.56|><|code_start|><|1423|><|797|><|1316|><|1222|><|147|><|719|><|1347|><|386|><|1390|><|1558|><|154|><|440|><|634|><|592|><|1097|><|1718|><|712|><|763|><|1118|><|1721|><|1311|><|868|><|580|><|362|><|1435|><|868|><|247|><|221|><|886|><|1145|><|1274|><|1284|><|457|><|1043|><|1459|><|1818|><|62|><|599|><|1035|><|62|><|1649|><|778|><|code_end|>
29
+ but<|t_0.20|><|code_start|><|780|><|1825|><|1681|><|1007|><|861|><|710|><|702|><|939|><|1669|><|1491|><|613|><|1739|><|823|><|1469|><|648|><|code_end|>
30
+ its<|t_0.09|><|code_start|><|92|><|688|><|1623|><|962|><|1670|><|527|><|599|><|code_end|>
31
+ still<|t_0.27|><|code_start|><|636|><|10|><|1217|><|344|><|713|><|957|><|823|><|154|><|1649|><|1286|><|508|><|214|><|1760|><|1250|><|456|><|1352|><|1368|><|921|><|615|><|5|><|code_end|>
32
+ really<|t_0.36|><|code_start|><|55|><|420|><|1008|><|1659|><|27|><|644|><|1266|><|617|><|761|><|1712|><|109|><|1465|><|1587|><|503|><|1541|><|619|><|197|><|1019|><|817|><|269|><|377|><|362|><|1381|><|507|><|1488|><|4|><|1695|><|code_end|>
33
+ enjoyable<|t_0.49|><|code_start|><|678|><|501|><|864|><|319|><|288|><|1472|><|1341|><|686|><|562|><|1463|><|619|><|1563|><|471|><|911|><|730|><|1811|><|1006|><|520|><|861|><|1274|><|125|><|1431|><|638|><|621|><|153|><|876|><|1770|><|437|><|987|><|1653|><|1109|><|898|><|1285|><|80|><|593|><|1709|><|843|><|code_end|>
34
+ and<|t_0.15|><|code_start|><|1285|><|987|><|303|><|1037|><|730|><|1164|><|502|><|120|><|1737|><|1655|><|1318|><|code_end|>
35
+ it<|t_0.09|><|code_start|><|848|><|1366|><|395|><|1601|><|1513|><|593|><|1302|><|code_end|>
36
+ looks<|t_0.27|><|code_start|><|1281|><|1266|><|1755|><|572|><|248|><|1751|><|1257|><|695|><|1380|><|457|><|659|><|585|><|1315|><|1105|><|1776|><|736|><|24|><|736|><|654|><|1027|><|code_end|>
37
+ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|1481|><|1721|><|1123|><|438|><|1246|><|1251|><|795|><|659|><|1381|><|1658|><|217|><|1772|><|562|><|952|><|107|><|1129|><|1112|><|467|><|550|><|1079|><|840|><|1615|><|1469|><|1380|><|168|><|917|><|836|><|1827|><|437|><|583|><|67|><|595|><|1087|><|1646|><|1493|><|1677|><|code_end|>)";
38
+
39
+ }
package/jest/mock.js CHANGED
@@ -4,6 +4,7 @@ if (!NativeModules.RNLlama) {
4
4
  const demoEmbedding = new Array(768).fill(0.01)
5
5
 
6
6
  const contextMap = {}
7
+ const vocoderMap = {}
7
8
  NativeModules.RNLlama = {
8
9
  setContextLimit: jest.fn(),
9
10
 
@@ -53,6 +54,9 @@ if (!NativeModules.RNLlama) {
53
54
 
54
55
  completion: jest.fn(async (contextId, jobId) => {
55
56
  const testResult = {
57
+ audio_tokens: [
58
+ 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010,
59
+ ],
56
60
  text: '*giggles*',
57
61
  completion_probabilities: [
58
62
  {
@@ -192,6 +196,7 @@ if (!NativeModules.RNLlama) {
192
196
  })),
193
197
  detokenize: jest.fn(async () => ''),
194
198
  embedding: jest.fn(async () => ({ embedding: demoEmbedding })),
199
+ rerank: jest.fn(async () => []),
195
200
 
196
201
  loadSession: jest.fn(async () => ({
197
202
  tokens_loaded: 1,
@@ -223,6 +228,25 @@ if (!NativeModules.RNLlama) {
223
228
  releaseMultimodal: jest.fn(async (id) => {
224
229
  delete contextMap[id]
225
230
  }),
231
+
232
+ initVocoder: jest.fn(async (id) => {
233
+ vocoderMap[id] = true
234
+ return true
235
+ }),
236
+ releaseVocoder: jest.fn(async (id) => {
237
+ delete vocoderMap[id]
238
+ }),
239
+ isVocoderEnabled: jest.fn(async (id) => vocoderMap[id] || false),
240
+ getFormattedAudioCompletion: jest.fn(
241
+ async (id, speakerJsonStr, textToSpeak) =>
242
+ `${speakerJsonStr || '<default speaker>'}<sep>${textToSpeak}`,
243
+ ),
244
+ getAudioCompletionGuideTokens: jest.fn(async (id, textToSpeak) =>
245
+ textToSpeak.split('').map((char) => char.charCodeAt(0) + 1000),
246
+ ),
247
+ decodeAudioTokens: jest.fn(async (id, tokens) =>
248
+ tokens.map((token) => token - 1000).map((token) => token / 1024),
249
+ ),
226
250
  }
227
251
  }
228
252
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cui-llama.rn",
3
- "version": "1.7.3",
3
+ "version": "1.7.6",
4
4
  "description": "Fork of llama.rn for ChatterUI",
5
5
  "main": "lib/commonjs/index",
6
6
  "module": "lib/module/index",
@@ -12,8 +12,6 @@ export type NativeContextParams = {
12
12
  */
13
13
  chat_template?: string
14
14
 
15
- reasoning_format?: string
16
-
17
15
  is_model_asset?: boolean
18
16
  use_progress_callback?: boolean
19
17
 
@@ -81,6 +79,10 @@ export type NativeContextParams = {
81
79
  export type NativeCompletionParams = {
82
80
  prompt: string
83
81
  n_threads?: number
82
+ /**
83
+ * Enable Jinja. Default: true if supported by the model
84
+ */
85
+ jinja?: boolean
84
86
  /**
85
87
  * JSON schema for convert to grammar for structured JSON output.
86
88
  * It will be override by grammar if both are set.
@@ -94,6 +96,14 @@ export type NativeCompletionParams = {
94
96
  * Lazy grammar sampling, trigger by grammar_triggers. Default: false
95
97
  */
96
98
  grammar_lazy?: boolean
99
+ /**
100
+ * Enable thinking if jinja is enabled. Default: true
101
+ */
102
+ enable_thinking?: boolean
103
+ /**
104
+ * Force thinking to be open. Default: false
105
+ */
106
+ thinking_forced_open?: boolean
97
107
  /**
98
108
  * Lazy grammar triggers. Default: []
99
109
  */
@@ -104,6 +114,7 @@ export type NativeCompletionParams = {
104
114
  }>
105
115
  preserved_tokens?: Array<string>
106
116
  chat_format?: number
117
+ reasoning_format?: string
107
118
  /**
108
119
  * Path to an image file to process before generating text.
109
120
  * When provided, the image will be processed and added to the context.
@@ -225,6 +236,13 @@ export type NativeCompletionParams = {
225
236
  */
226
237
  seed?: number
227
238
 
239
+ /**
240
+ * Guide tokens for the completion.
241
+ * Help prevent hallucinations by forcing the TTS to use the correct words.
242
+ * Default: `[]`
243
+ */
244
+ guide_tokens?: Array<number>
245
+
228
246
  emit_partial_completion: boolean
229
247
  }
230
248
 
@@ -285,6 +303,7 @@ export type NativeCompletionResult = {
285
303
  timings: NativeCompletionResultTimings
286
304
 
287
305
  completion_probabilities?: Array<NativeCompletionTokenProb>
306
+ audio_tokens?: Array<number>
288
307
  }
289
308
 
290
309
  export type NativeTokenizeResult = {
@@ -390,6 +409,7 @@ export type JinjaFormattedChatResult = FormattedChatResult & {
390
409
  value: string
391
410
  token: number
392
411
  }>
412
+ thinking_forced_open?: boolean
393
413
  preserved_tokens?: Array<string>
394
414
  additional_stops?: Array<string>
395
415
  }
@@ -400,6 +420,15 @@ export type NativeImageProcessingResult = {
400
420
  error?: string
401
421
  }
402
422
 
423
+ export type NativeRerankParams = {
424
+ normalize?: number
425
+ }
426
+
427
+ export type NativeRerankResult = {
428
+ score: number
429
+ index: number
430
+ }
431
+
403
432
  export interface Spec extends TurboModule {
404
433
  toggleNativeLog(enabled: boolean): Promise<void>
405
434
  setContextLimit(limit: number): Promise<void>
@@ -420,6 +449,7 @@ export interface Spec extends TurboModule {
420
449
  tools?: string
421
450
  parallel_tool_calls?: string
422
451
  tool_choice?: string
452
+ enable_thinking?: boolean
423
453
  },
424
454
  ): Promise<JinjaFormattedChatResult | string>
425
455
  loadSession(
@@ -445,6 +475,12 @@ export interface Spec extends TurboModule {
445
475
  text: string,
446
476
  params: NativeEmbeddingParams,
447
477
  ): Promise<NativeEmbeddingResult>
478
+ rerank(
479
+ contextId: number,
480
+ query: string,
481
+ documents: Array<string>,
482
+ params?: NativeRerankParams,
483
+ ): Promise<Array<NativeRerankResult>>
448
484
  bench(
449
485
  contextId: number,
450
486
  pp: number,
@@ -486,6 +522,14 @@ export interface Spec extends TurboModule {
486
522
  contextId: number,
487
523
  ): Promise<void>
488
524
 
525
+ // TTS methods
526
+ initVocoder(contextId: number, vocoderModelPath: string): Promise<boolean>
527
+ isVocoderEnabled(contextId: number): Promise<boolean>
528
+ getFormattedAudioCompletion(contextId: number, speakerJsonStr: string, textToSpeak: string): Promise<string>
529
+ getAudioCompletionGuideTokens(contextId: number, textToSpeak: string): Promise<Array<number>>
530
+ decodeAudioTokens(contextId: number, tokens: number[]): Promise<Array<number>>
531
+ releaseVocoder(contextId: number): Promise<void>
532
+
489
533
  releaseContext(contextId: number): Promise<void>
490
534
 
491
535
  releaseAllContexts(): Promise<void>