cui-llama.rn 1.7.4 → 1.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. package/README.md +217 -17
  2. package/android/src/main/CMakeLists.txt +34 -15
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +79 -5
  4. package/android/src/main/java/com/rnllama/RNLlama.java +237 -0
  5. package/android/src/main/jni.cpp +213 -14
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  14. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
  15. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
  16. package/cpp/README.md +1 -1
  17. package/cpp/chat-parser.cpp +385 -0
  18. package/cpp/chat-parser.h +120 -0
  19. package/cpp/chat.cpp +726 -596
  20. package/cpp/chat.h +71 -6
  21. package/cpp/common.cpp +56 -38
  22. package/cpp/common.h +9 -3
  23. package/cpp/ggml-backend-reg.cpp +5 -0
  24. package/cpp/ggml-backend.cpp +10 -2
  25. package/cpp/ggml-common.h +4 -0
  26. package/cpp/ggml-cpu/amx/amx.cpp +1 -1
  27. package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
  28. package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  29. package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
  30. package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
  31. package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  32. package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
  33. package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
  34. package/cpp/ggml-cpu/arch-fallback.h +184 -0
  35. package/cpp/ggml-cpu/common.h +4 -3
  36. package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
  37. package/cpp/ggml-cpu/ggml-cpu.c +123 -104
  38. package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
  39. package/cpp/ggml-cpu/ops.cpp +330 -148
  40. package/cpp/ggml-cpu/ops.h +1 -0
  41. package/cpp/ggml-cpu/quants.c +1158 -0
  42. package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
  43. package/cpp/ggml-cpu/repack.cpp +1571 -0
  44. package/cpp/ggml-cpu/repack.h +98 -0
  45. package/cpp/ggml-cpu/simd-mappings.h +330 -38
  46. package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
  47. package/cpp/ggml-cpu/vec.cpp +87 -18
  48. package/cpp/ggml-cpu/vec.h +249 -94
  49. package/cpp/ggml-cpu.h +1 -0
  50. package/cpp/ggml-impl.h +63 -183
  51. package/cpp/ggml-llama-sim.metallib +0 -0
  52. package/cpp/ggml-llama.metallib +0 -0
  53. package/cpp/ggml-metal.m +152 -45
  54. package/cpp/ggml-quants.c +0 -2
  55. package/cpp/ggml.c +61 -21
  56. package/cpp/ggml.h +22 -3
  57. package/cpp/gguf.cpp +24 -3
  58. package/cpp/json-partial.cpp +256 -0
  59. package/cpp/json-partial.h +38 -0
  60. package/cpp/json-schema-to-grammar.cpp +5 -47
  61. package/cpp/json-schema-to-grammar.h +4 -4
  62. package/cpp/llama-arch.cpp +153 -3
  63. package/cpp/llama-arch.h +27 -1
  64. package/cpp/llama-batch.cpp +741 -272
  65. package/cpp/llama-batch.h +112 -54
  66. package/cpp/llama-chat.cpp +30 -8
  67. package/cpp/llama-chat.h +1 -0
  68. package/cpp/llama-context.cpp +524 -339
  69. package/cpp/llama-context.h +38 -17
  70. package/cpp/llama-cparams.cpp +4 -0
  71. package/cpp/llama-cparams.h +2 -0
  72. package/cpp/llama-grammar.cpp +12 -2
  73. package/cpp/llama-graph.cpp +431 -356
  74. package/cpp/llama-graph.h +126 -58
  75. package/cpp/llama-hparams.cpp +10 -2
  76. package/cpp/llama-hparams.h +19 -2
  77. package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
  78. package/cpp/llama-kv-cache-unified-iswa.h +128 -0
  79. package/cpp/llama-kv-cache-unified.cpp +1841 -0
  80. package/cpp/llama-kv-cache-unified.h +303 -0
  81. package/cpp/llama-kv-cells.h +439 -0
  82. package/cpp/llama-memory-hybrid.cpp +246 -0
  83. package/cpp/llama-memory-hybrid.h +138 -0
  84. package/cpp/llama-memory-recurrent.cpp +1112 -0
  85. package/cpp/llama-memory-recurrent.h +183 -0
  86. package/cpp/llama-memory.cpp +41 -0
  87. package/cpp/llama-memory.h +86 -5
  88. package/cpp/llama-mmap.cpp +1 -1
  89. package/cpp/llama-model-loader.cpp +42 -17
  90. package/cpp/llama-model-saver.cpp +1 -0
  91. package/cpp/llama-model.cpp +1639 -513
  92. package/cpp/llama-model.h +26 -0
  93. package/cpp/llama-sampling.cpp +2 -2
  94. package/cpp/llama-vocab.cpp +65 -28
  95. package/cpp/llama-vocab.h +1 -0
  96. package/cpp/llama.cpp +11 -7
  97. package/cpp/llama.h +150 -42
  98. package/cpp/minja/chat-template.hpp +1 -1
  99. package/cpp/minja/minja.hpp +1 -1
  100. package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
  101. package/cpp/nlohmann/json_fwd.hpp +187 -0
  102. package/cpp/regex-partial.cpp +204 -0
  103. package/cpp/regex-partial.h +56 -0
  104. package/cpp/rn-llama.cpp +646 -35
  105. package/cpp/rn-llama.h +32 -1
  106. package/cpp/rn-tts.h +39 -0
  107. package/cpp/sampling.cpp +7 -8
  108. package/cpp/tools/mtmd/clip-impl.h +5 -0
  109. package/cpp/tools/mtmd/clip.cpp +572 -436
  110. package/cpp/tools/mtmd/clip.h +14 -4
  111. package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
  112. package/cpp/tools/mtmd/mtmd-audio.h +2 -17
  113. package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
  114. package/cpp/tools/mtmd/mtmd-helper.h +91 -0
  115. package/cpp/tools/mtmd/mtmd.cpp +368 -248
  116. package/cpp/tools/mtmd/mtmd.h +6 -70
  117. package/cpp/unicode.cpp +5 -0
  118. package/ios/CMakeLists.txt +26 -6
  119. package/ios/RNLlama.h +1 -1
  120. package/ios/RNLlama.mm +153 -3
  121. package/ios/RNLlamaContext.h +9 -1
  122. package/ios/RNLlamaContext.mm +112 -9
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
  131. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  132. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
  133. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
  134. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  135. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
  136. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
  137. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
  138. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
  139. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  140. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  141. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  142. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  143. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  144. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
  145. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
  146. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
  147. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
  148. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  149. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
  150. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
  151. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  152. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
  153. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
  154. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
  155. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  156. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
  160. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
  161. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
  162. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
  163. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
  164. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
  165. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  166. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
  167. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
  168. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  169. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
  170. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
  171. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
  172. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
  173. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  174. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  175. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  176. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  177. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  178. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
  179. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
  180. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
  181. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
  182. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  183. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
  184. package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
  185. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  186. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
  187. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
  188. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
  189. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  190. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  191. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
  192. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
  193. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
  194. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
  195. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
  196. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
  197. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
  198. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
  199. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  200. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
  201. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
  202. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  203. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
  204. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
  205. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
  206. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
  207. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  208. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  209. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  210. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  211. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  212. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
  213. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
  214. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
  215. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
  216. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  217. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
  218. package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
  219. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  220. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
  221. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
  222. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
  223. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  224. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  225. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
  226. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
  227. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
  228. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
  229. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
  230. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
  231. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
  232. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
  233. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  234. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
  235. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
  236. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  237. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
  238. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
  239. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
  240. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
  241. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  242. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  243. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  244. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  245. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  246. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
  247. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
  248. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
  249. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
  250. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  251. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
  252. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
  253. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  254. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
  255. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
  256. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
  257. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  258. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  259. package/jest/mock.js +24 -0
  260. package/package.json +1 -1
  261. package/src/NativeRNLlama.ts +46 -2
  262. package/src/index.ts +105 -1
  263. package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  264. package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
  265. package/cpp/ggml-cpu/sgemm.cpp +0 -3544
  266. package/cpp/ggml-cpu/sgemm.h +0 -14
  267. package/cpp/llama-kv-cache.cpp +0 -2827
  268. package/cpp/llama-kv-cache.h +0 -515
  269. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  270. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  271. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  272. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
  273. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  274. /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
  275. /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
  276. /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
@@ -124,6 +124,11 @@ public class RNLlamaModule extends ReactContextBaseJavaModule {
124
124
  rnllama.embedding(id, text, params, promise);
125
125
  }
126
126
 
127
+ @ReactMethod
128
+ public void rerank(double id, final String query, final ReadableArray documents, final ReadableMap params, final Promise promise) {
129
+ rnllama.rerank(id, query, documents, params, promise);
130
+ }
131
+
127
132
  @ReactMethod
128
133
  public void bench(double id, final double pp, final double tg, final double pl, final double nr, final Promise promise) {
129
134
  rnllama.bench(id, pp, tg, pl, nr, promise);
@@ -145,6 +150,35 @@ public class RNLlamaModule extends ReactContextBaseJavaModule {
145
150
  }
146
151
 
147
152
  @ReactMethod
153
+ public void initVocoder(double id, final String vocoderModelPath, final Promise promise) {
154
+ rnllama.initVocoder(id, vocoderModelPath, promise);
155
+ }
156
+
157
+ @ReactMethod
158
+ public void isVocoderEnabled(double id, final Promise promise) {
159
+ rnllama.isVocoderEnabled(id, promise);
160
+ }
161
+
162
+ @ReactMethod
163
+ public void getFormattedAudioCompletion(double id, final String speakerJsonStr, final String textToSpeak, final Promise promise) {
164
+ rnllama.getFormattedAudioCompletion(id, speakerJsonStr, textToSpeak, promise);
165
+ }
166
+
167
+ @ReactMethod
168
+ public void getAudioCompletionGuideTokens(double id, final String textToSpeak, final Promise promise) {
169
+ rnllama.getAudioCompletionGuideTokens(id, textToSpeak, promise);
170
+ }
171
+
172
+ @ReactMethod
173
+ public void decodeAudioTokens(double id, final ReadableArray tokens, final Promise promise) {
174
+ rnllama.decodeAudioTokens(id, tokens, promise);
175
+ }
176
+
177
+ @ReactMethod
178
+ public void releaseVocoder(double id, final Promise promise) {
179
+ rnllama.releaseVocoder(id, promise);
180
+ }
181
+
148
182
  public void releaseContext(double id, Promise promise) {
149
183
  rnllama.releaseContext(id, promise);
150
184
  }
package/cpp/README.md CHANGED
@@ -1,4 +1,4 @@
1
1
  # Note
2
2
 
3
- - Only `rn-llama.h` and `rn-llama.cpp` are the specific files for this folder, others are sync from [llama.cpp](https://github.com/ggerganov/llama.cpp).
3
+ - Only `rn-tts.h`, `rn-llama.h` and `rn-llama.cpp` are the specific files for this folder, others are sync from [llama.cpp](https://github.com/ggerganov/llama.cpp).
4
4
  - We can update the native source by using the [bootstrap](../scripts/bootstrap.sh) script.
@@ -0,0 +1,385 @@
1
+ #include "chat-parser.h"
2
+ #include "common.h"
3
+ #include "log.h"
4
+ #include "regex-partial.h"
5
+
6
+ #include <optional>
7
+ #include <stdexcept>
8
+ #include <string>
9
+ #include <vector>
10
+
11
+ using json = nlohmann::ordered_json;
12
+
13
+ common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_syntax & syntax)
14
+ : input_(input), is_partial_(is_partial), syntax_(syntax)
15
+ {
16
+ result_.role = "assistant";
17
+
18
+ while (true) {
19
+ std::string id = std::to_string(std::rand());
20
+ if (input.find(id) == std::string::npos) {
21
+ healing_marker_ = id;
22
+ break;
23
+ }
24
+ }
25
+ }
26
+
27
+ std::string common_chat_msg_parser::str(const common_string_range & rng) const {
28
+ LM_GGML_ASSERT(rng.begin <= rng.end);
29
+ return input_.substr(rng.begin, rng.end - rng.begin);
30
+ }
31
+
32
+ void common_chat_msg_parser::add_content(const std::string &content) {
33
+ result_.content += content;
34
+ }
35
+
36
+ void common_chat_msg_parser::add_reasoning_content(const std::string &reasoning_content) {
37
+ result_.reasoning_content += reasoning_content;
38
+ }
39
+
40
+ bool common_chat_msg_parser::add_tool_call(const std::string & name, const std::string & id, const std::string & arguments) {
41
+ if (name.empty()) {
42
+ return false;
43
+ }
44
+
45
+ common_chat_tool_call tool_call;
46
+ tool_call.name = name;
47
+ tool_call.arguments = arguments;
48
+ tool_call.id = id;
49
+
50
+ // LOG_DBG("Tool call arguments:\n\traw: %s\n\tresult: %s\n", arguments.c_str(), tool_call.arguments.c_str());
51
+ result_.tool_calls.emplace_back(tool_call);
52
+
53
+ return true;
54
+ }
55
+ bool common_chat_msg_parser::add_tool_call(const json & tool_call) {
56
+ std::string name = tool_call.contains("name") ? tool_call.at("name") : "";
57
+ std::string id = tool_call.contains("id") ? tool_call.at("id") : "";
58
+ std::string arguments = tool_call.contains("arguments") ? tool_call.at("arguments") : "";
59
+ return add_tool_call(name, id, arguments);
60
+ }
61
+
62
+ bool common_chat_msg_parser::add_tool_calls(const json & arr) {
63
+ for (const auto & item : arr) {
64
+ if (!add_tool_call(item)) {
65
+ return false;
66
+ }
67
+ }
68
+ return true;
69
+ }
70
+ void common_chat_msg_parser::finish() {
71
+ if (!is_partial_ && pos_ != input_.size()) {
72
+ throw std::runtime_error("Unexpected content at end of input");// + input_.substr(pos_));
73
+ }
74
+ }
75
+
76
+ bool common_chat_msg_parser::consume_spaces() {
77
+ const auto length = input_.size();
78
+ auto consumed = false;
79
+ while (pos_ < length && std::isspace(input_[pos_])) {
80
+ ++pos_;
81
+ consumed = true;
82
+ }
83
+ return consumed;
84
+ }
85
+
86
+ bool common_chat_msg_parser::try_consume_literal(const std::string & literal) {
87
+ auto pos = pos_;
88
+ for (auto i = 0u; i < literal.size(); ++i) {
89
+ if (pos >= input_.size()) {
90
+ return false;
91
+ }
92
+ if (input_[pos] != literal[i]) {
93
+ return false;
94
+ }
95
+ ++pos;
96
+ }
97
+ pos_ = pos;
98
+ return true;
99
+ }
100
+
101
+ std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_find_literal(const std::string & literal) {
102
+ auto idx = input_.find(literal, pos_);
103
+ if (idx != std::string::npos) {
104
+ find_regex_result res;
105
+ res.prelude = input_.substr(pos_, idx - pos_);
106
+ auto end = idx + literal.size();
107
+ res.groups.emplace_back(common_string_range{idx, end});
108
+ move_to(end);
109
+ return res;
110
+ }
111
+ if (is_partial_) {
112
+ idx = string_find_partial_stop(input_, literal);
113
+ if (idx != std::string::npos && idx >= pos_) {
114
+ find_regex_result res;
115
+ res.prelude = input_.substr(pos_, idx - pos_);
116
+ auto end = input_.size();
117
+ res.groups.emplace_back(common_string_range{idx, end});
118
+ move_to(end);
119
+ return res;
120
+ }
121
+ }
122
+ return std::nullopt;
123
+ }
124
+
125
+ void common_chat_msg_parser::consume_literal(const std::string & literal) {
126
+ if (!try_consume_literal(literal)) {
127
+ throw common_chat_msg_partial_exception(literal);
128
+ }
129
+ }
130
+
131
+ bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think, const std::string & end_think) {
132
+ auto handle_reasoning = [&](const std::string & reasoning, bool closed) {
133
+ auto stripped_reasoning = string_strip(reasoning);
134
+ if (stripped_reasoning.empty()) {
135
+ return;
136
+ }
137
+ if (syntax_.reasoning_in_content) {
138
+ add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "<think>" : start_think);
139
+ add_content(stripped_reasoning);
140
+ if (closed) {
141
+ add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "</think>" : end_think);
142
+ }
143
+ } else {
144
+ add_reasoning_content(stripped_reasoning);
145
+ }
146
+ };
147
+ if (syntax_.reasoning_format != COMMON_REASONING_FORMAT_NONE) {
148
+ if (syntax_.thinking_forced_open || try_consume_literal(start_think)) {
149
+ if (auto res = try_find_literal(end_think)) {
150
+ handle_reasoning(res->prelude, /* closed */ true);
151
+ consume_spaces();
152
+ return true;
153
+ }
154
+ auto rest = consume_rest();
155
+ if (!rest.empty()) {
156
+ handle_reasoning(rest, /* closed */ !is_partial());
157
+ }
158
+ // Allow unclosed thinking tags, for now (https://github.com/ggml-org/llama.cpp/issues/13812, https://github.com/ggml-org/llama.cpp/issues/13877)
159
+ // if (!syntax_.thinking_forced_open) {
160
+ // throw common_chat_msg_partial_exception(end_think);
161
+ // }
162
+ return true;
163
+ }
164
+ }
165
+ return false;
166
+ }
167
+
168
+ std::string common_chat_msg_parser::consume_rest() {
169
+ auto rest = input_.substr(pos_);
170
+ pos_ = input_.size();
171
+ return rest;
172
+ }
173
+
174
+ // Tries to find the regex, consumes it (pos right after it) and gives the prelude (right before it) and the groups to the callback.
175
+ std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_find_regex(const common_regex & regex, size_t from, bool add_prelude_to_content) {
176
+ auto m = regex.search(input_, from == std::string::npos ? pos_ : from);
177
+ if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) {
178
+ return std::nullopt;
179
+ }
180
+ auto prelude = input_.substr(pos_, m.groups[0].begin - pos_);
181
+ pos_ = m.groups[0].end;
182
+
183
+ if (add_prelude_to_content) {
184
+ add_content(prelude);
185
+ }
186
+ if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) {
187
+ if (is_partial()) {
188
+ throw common_chat_msg_partial_exception(regex.str());
189
+ }
190
+ return std::nullopt;
191
+ }
192
+ return find_regex_result{prelude, m.groups};
193
+ }
194
+
195
+ common_chat_msg_parser::find_regex_result common_chat_msg_parser::consume_regex(const common_regex & regex) {
196
+ if (auto result = try_consume_regex(regex)) {
197
+ return *result;
198
+ }
199
+ throw common_chat_msg_partial_exception(regex.str());
200
+ }
201
+
202
+ std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_consume_regex(const common_regex & regex) {
203
+ auto m = regex.search(input_, pos_);
204
+ if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) {
205
+ return std::nullopt;
206
+ }
207
+ if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) {
208
+ if (is_partial()) {
209
+ throw common_chat_msg_partial_exception(regex.str());
210
+ }
211
+ return std::nullopt;
212
+ }
213
+ if (m.groups[0].begin != pos_) {
214
+ // Didn't match at the current position.
215
+ return std::nullopt;
216
+ }
217
+ pos_ = m.groups[0].end;
218
+
219
+ return find_regex_result {
220
+ /* .prelude = */ "",
221
+ m.groups,
222
+ };
223
+ }
224
+
225
+ std::optional<common_json> common_chat_msg_parser::try_consume_json() {
226
+ auto it = input_.cbegin() + pos_;
227
+ const auto end = input_.cend();
228
+ common_json result;
229
+ if (!common_json_parse(it, end, healing_marker_, result)) {
230
+ return std::nullopt;
231
+ }
232
+ pos_ = std::distance(input_.cbegin(), it);
233
+ if (result.healing_marker.marker.empty()) {
234
+ // No healing marker, just return the parsed json
235
+ return result;
236
+ }
237
+ if (!is_partial()) {
238
+ throw common_chat_msg_partial_exception("JSON");
239
+ }
240
+ return result;
241
+ }
242
+
243
+ common_json common_chat_msg_parser::consume_json() {
244
+ if (auto result = try_consume_json()) {
245
+ return *result;
246
+ }
247
+ throw common_chat_msg_partial_exception("JSON");
248
+ }
249
+
250
+ common_chat_msg_parser::consume_json_result common_chat_msg_parser::consume_json_with_dumped_args(
251
+ const std::vector<std::vector<std::string>> & args_paths,
252
+ const std::vector<std::vector<std::string>> & content_paths
253
+ ) {
254
+ if (auto result = try_consume_json_with_dumped_args(args_paths, content_paths)) {
255
+ return *result;
256
+ }
257
+ throw common_chat_msg_partial_exception("JSON");
258
+ }
259
+
260
+ std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parser::try_consume_json_with_dumped_args(
261
+ const std::vector<std::vector<std::string>> & args_paths,
262
+ const std::vector<std::vector<std::string>> & content_paths
263
+ ) {
264
+ auto partial = try_consume_json();
265
+ if (!partial) {
266
+ return std::nullopt;
267
+ }
268
+ auto is_arguments_path = [&](const std::vector<std::string> & path) {
269
+ return std::find(args_paths.begin(), args_paths.end(), path) != args_paths.end();
270
+ };
271
+ auto is_content_path = [&](const std::vector<std::string> & path) {
272
+ return std::find(content_paths.begin(), content_paths.end(), path) != content_paths.end();
273
+ };
274
+
275
+ if (partial->healing_marker.marker.empty()) {
276
+ if (args_paths.empty()) {
277
+ // No arguments to dump, and JSON was parsed fully.
278
+ return consume_json_result {
279
+ partial->json,
280
+ /* .is_partial = */ false,
281
+ };
282
+ }
283
+ if (is_arguments_path({})) {
284
+ // Entire JSON is the arguments and was parsed fully.
285
+ return consume_json_result {
286
+ partial->json.dump(),
287
+ /* .is_partial = */ false,
288
+ };
289
+ }
290
+ }
291
+
292
+ LOG_DBG("Parsed partial JSON: %s (json_healing_marker: %s)\n", partial->json.dump().c_str(), partial->healing_marker.json_dump_marker.c_str());
293
+
294
+ auto found_healing_marker = false;
295
+ std::vector<std::string> path;
296
+ std::function<json(const json &)> remove_unsupported_healings_and_dump_args = [&](const json & j) -> json {
297
+ if (is_arguments_path(path)) {
298
+ auto arguments = j.dump();
299
+ if (is_partial() && !partial->healing_marker.marker.empty()) {
300
+ auto idx = arguments.find(partial->healing_marker.json_dump_marker);
301
+ if (idx != std::string::npos) {
302
+ arguments.resize(idx);
303
+ found_healing_marker = true;
304
+ }
305
+ if (arguments == "\"") {
306
+ // This happens because of completing `:"$magic` after `"arguments"`
307
+ arguments = "";
308
+ }
309
+ }
310
+ return arguments;
311
+ }
312
+ if (is_content_path(path)) {
313
+ if (!j.is_string()) {
314
+ throw std::runtime_error("Content path must be a string");
315
+ }
316
+ std::string str = j;
317
+ auto idx = str.find(partial->healing_marker.marker); // not using json_dump_marker as we're inside a string
318
+ if (idx != std::string::npos) {
319
+ str.resize(idx);
320
+ found_healing_marker = true;
321
+ }
322
+ return str;
323
+ }
324
+ if (j.is_object()) {
325
+ auto obj = json::object();
326
+ for (const auto & p : j.items()) {
327
+ const auto & key = p.key();
328
+ const auto & value = p.value();
329
+ const std::string key_str = key; // NOLINT
330
+ auto idx = key_str.find(healing_marker_);
331
+ if (idx != std::string::npos) {
332
+ found_healing_marker = true;
333
+ break;
334
+ }
335
+ path.push_back(key_str);
336
+ if (value.is_string()) {
337
+ const std::string value_str = value;
338
+ if (value_str.find(healing_marker_) != std::string::npos) {
339
+ found_healing_marker = true;
340
+ if (is_content_path(path)) {
341
+ if (partial->healing_marker.marker == partial->healing_marker.json_dump_marker) {
342
+ // The healing occurred inside the string: good. Otherwise we just ditch the entire key/value pair.
343
+ obj[key] = remove_unsupported_healings_and_dump_args(value);
344
+ }
345
+ }
346
+ break;
347
+ }
348
+ obj[key] = value;
349
+ } else {
350
+ obj[key] = remove_unsupported_healings_and_dump_args(value);
351
+ }
352
+ path.pop_back();
353
+ }
354
+ return obj;
355
+ }
356
+ if (j.is_array()) {
357
+ auto arr = json::array();
358
+ for (const auto & value : j) {
359
+ if (value.is_string()) {
360
+ std::string str = value;
361
+ auto idx = str.find(healing_marker_);
362
+ if (idx != std::string::npos) {
363
+ // Don't heal array values that aren't in the arguments.
364
+ found_healing_marker = true;
365
+ break;
366
+ }
367
+ }
368
+ arr.push_back(remove_unsupported_healings_and_dump_args(value));
369
+ }
370
+ return arr;
371
+ }
372
+ return j;
373
+ };
374
+
375
+ auto cleaned = remove_unsupported_healings_and_dump_args(partial->json);
376
+ LOG_DBG("Cleaned up JSON %s to %s (json_healing_marker : '%s')\n", partial->json.dump().c_str(), cleaned.dump().c_str(), partial->healing_marker.json_dump_marker.c_str());
377
+ return consume_json_result {
378
+ cleaned,
379
+ /* .is_partial = */ found_healing_marker,
380
+ };
381
+ }
382
+
383
+ void common_chat_msg_parser::clear_tools() {
384
+ result_.tool_calls.clear();
385
+ }
@@ -0,0 +1,120 @@
1
+ #pragma once
2
+
3
+ #include "chat.h"
4
+ #include "json-partial.h"
5
+ #include "regex-partial.h"
6
+
7
+ #include "nlohmann/json.hpp"
8
+
9
+ #include <optional>
10
+ #include <string>
11
+ #include <vector>
12
+
13
+ class common_chat_msg_partial_exception : public std::runtime_error {
14
+ public:
15
+ common_chat_msg_partial_exception(const std::string & message) : std::runtime_error(message) {}
16
+ };
17
+
18
+ class common_chat_msg_parser {
19
+ std::string input_;
20
+ bool is_partial_;
21
+ common_chat_syntax syntax_;
22
+ std::string healing_marker_;
23
+
24
+ size_t pos_ = 0;
25
+ common_chat_msg result_;
26
+
27
+ public:
28
+ common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
29
+ const std::string & input() const { return input_; }
30
+ size_t pos() const { return pos_; }
31
+ const std::string & healing_marker() const { return healing_marker_; }
32
+ const bool & is_partial() const { return is_partial_; }
33
+ const common_chat_msg & result() const { return result_; }
34
+ const common_chat_syntax & syntax() const { return syntax_; }
35
+
36
+ void move_to(size_t pos) {
37
+ if (pos > input_.size()) {
38
+ throw std::runtime_error("Invalid position!");
39
+ }
40
+ pos_ = pos;
41
+ }
42
+ void move_back(size_t n) {
43
+ if (pos_ < n) {
44
+ throw std::runtime_error("Can't move back that far!");
45
+ }
46
+ pos_ -= n;
47
+ }
48
+
49
+ // Get the substring of the input at the given range
50
+ std::string str(const common_string_range & rng) const;
51
+
52
+ // Appends to the result.content field
53
+ void add_content(const std::string & content);
54
+
55
+ // Appends to the result.reasoning_content field
56
+ void add_reasoning_content(const std::string & reasoning_content);
57
+
58
+ // Adds a tool call to the result. If the tool call is too incomplete (e.g. name empty), it won't add anything.
59
+ bool add_tool_call(const std::string & name, const std::string & id, const std::string & arguments);
60
+
61
+ // Adds a tool call using the "name", "id" and "arguments" fields of the json object
62
+ bool add_tool_call(const nlohmann::ordered_json & tool_call);
63
+
64
+ // Adds an array of tool calls using their "name", "id" and "arguments" fields.
65
+ bool add_tool_calls(const nlohmann::ordered_json & arr);
66
+
67
+ void finish();
68
+
69
+ bool consume_spaces();
70
+
71
+ void consume_literal(const std::string & literal);
72
+
73
+ bool try_parse_reasoning(const std::string & start_think, const std::string & end_think);
74
+
75
+ std::string consume_rest();
76
+
77
+ struct find_regex_result {
78
+ std::string prelude;
79
+ std::vector<common_string_range> groups;
80
+ };
81
+
82
+ std::optional<find_regex_result> try_find_regex(const common_regex & regex, size_t from = std::string::npos, bool add_prelude_to_content = true);
83
+
84
+ bool try_consume_literal(const std::string & literal);
85
+
86
+ std::optional<find_regex_result> try_find_literal(const std::string & literal);
87
+
88
+ find_regex_result consume_regex(const common_regex & regex);
89
+
90
+ std::optional<find_regex_result> try_consume_regex(const common_regex & regex);
91
+
92
+ std::optional<common_json> try_consume_json();
93
+ common_json consume_json();
94
+
95
+ struct consume_json_result {
96
+ nlohmann::ordered_json value;
97
+ bool is_partial;
98
+ };
99
+
100
+ /*
101
+ Consume (possibly partial) json and converts specific subtrees to (possibly truncated) JSON strings.
102
+
103
+ By default, object keys can't be truncated, nor can string values (their corresponding key is removed,
104
+ e.g. `{"foo": "bar", "baz": "b` -> `{"foo": "bar"}`
105
+
106
+ But one can allow subpaths to be kept truncated, and possibly json-dumped to truncated json strings
107
+ - with `content_paths={{"foo"}}` -> `{"foo": "b` -> {"foo": "b"}`
108
+ - with `args_paths={{"foo"}}` -> `{"foo": {"b` -> `{"foo": "{b"}`
109
+ */
110
+ consume_json_result consume_json_with_dumped_args(
111
+ const std::vector<std::vector<std::string>> & args_paths = {},
112
+ const std::vector<std::vector<std::string>> & content_paths = {}
113
+ );
114
+ std::optional<consume_json_result> try_consume_json_with_dumped_args(
115
+ const std::vector<std::vector<std::string>> & args_paths = {},
116
+ const std::vector<std::vector<std::string>> & content_paths = {}
117
+ );
118
+
119
+ void clear_tools();
120
+ };