cui-llama.rn 1.7.3 → 1.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. package/README.md +217 -17
  2. package/android/src/main/CMakeLists.txt +34 -15
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +94 -8
  4. package/android/src/main/java/com/rnllama/RNLlama.java +247 -0
  5. package/android/src/main/jni.cpp +213 -14
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  14. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
  15. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
  16. package/cpp/README.md +1 -1
  17. package/cpp/chat-parser.cpp +385 -0
  18. package/cpp/chat-parser.h +120 -0
  19. package/cpp/chat.cpp +726 -596
  20. package/cpp/chat.h +71 -6
  21. package/cpp/common.cpp +56 -38
  22. package/cpp/common.h +9 -3
  23. package/cpp/ggml-backend-reg.cpp +5 -0
  24. package/cpp/ggml-backend.cpp +10 -2
  25. package/cpp/ggml-common.h +4 -0
  26. package/cpp/ggml-cpu/amx/amx.cpp +1 -1
  27. package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
  28. package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  29. package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
  30. package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
  31. package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  32. package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
  33. package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
  34. package/cpp/ggml-cpu/arch-fallback.h +184 -0
  35. package/cpp/ggml-cpu/common.h +4 -3
  36. package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
  37. package/cpp/ggml-cpu/ggml-cpu.c +123 -104
  38. package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
  39. package/cpp/ggml-cpu/ops.cpp +330 -148
  40. package/cpp/ggml-cpu/ops.h +1 -0
  41. package/cpp/ggml-cpu/quants.c +1158 -0
  42. package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
  43. package/cpp/ggml-cpu/repack.cpp +1571 -0
  44. package/cpp/ggml-cpu/repack.h +98 -0
  45. package/cpp/ggml-cpu/simd-mappings.h +330 -38
  46. package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
  47. package/cpp/ggml-cpu/vec.cpp +87 -18
  48. package/cpp/ggml-cpu/vec.h +249 -94
  49. package/cpp/ggml-cpu.h +1 -0
  50. package/cpp/ggml-impl.h +63 -183
  51. package/cpp/ggml-llama-sim.metallib +0 -0
  52. package/cpp/ggml-llama.metallib +0 -0
  53. package/cpp/ggml-metal.m +152 -45
  54. package/cpp/ggml-quants.c +0 -2
  55. package/cpp/ggml.c +61 -21
  56. package/cpp/ggml.h +22 -3
  57. package/cpp/gguf.cpp +24 -3
  58. package/cpp/json-partial.cpp +256 -0
  59. package/cpp/json-partial.h +38 -0
  60. package/cpp/json-schema-to-grammar.cpp +5 -47
  61. package/cpp/json-schema-to-grammar.h +4 -4
  62. package/cpp/llama-arch.cpp +153 -3
  63. package/cpp/llama-arch.h +27 -1
  64. package/cpp/llama-batch.cpp +741 -272
  65. package/cpp/llama-batch.h +112 -54
  66. package/cpp/llama-chat.cpp +30 -8
  67. package/cpp/llama-chat.h +1 -0
  68. package/cpp/llama-context.cpp +524 -339
  69. package/cpp/llama-context.h +38 -17
  70. package/cpp/llama-cparams.cpp +4 -0
  71. package/cpp/llama-cparams.h +2 -0
  72. package/cpp/llama-grammar.cpp +12 -2
  73. package/cpp/llama-graph.cpp +431 -356
  74. package/cpp/llama-graph.h +126 -58
  75. package/cpp/llama-hparams.cpp +10 -2
  76. package/cpp/llama-hparams.h +19 -2
  77. package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
  78. package/cpp/llama-kv-cache-unified-iswa.h +128 -0
  79. package/cpp/llama-kv-cache-unified.cpp +1841 -0
  80. package/cpp/llama-kv-cache-unified.h +303 -0
  81. package/cpp/llama-kv-cells.h +439 -0
  82. package/cpp/llama-memory-hybrid.cpp +246 -0
  83. package/cpp/llama-memory-hybrid.h +138 -0
  84. package/cpp/llama-memory-recurrent.cpp +1112 -0
  85. package/cpp/llama-memory-recurrent.h +183 -0
  86. package/cpp/llama-memory.cpp +41 -0
  87. package/cpp/llama-memory.h +86 -5
  88. package/cpp/llama-mmap.cpp +1 -1
  89. package/cpp/llama-model-loader.cpp +42 -17
  90. package/cpp/llama-model-saver.cpp +1 -0
  91. package/cpp/llama-model.cpp +1639 -513
  92. package/cpp/llama-model.h +26 -0
  93. package/cpp/llama-sampling.cpp +2 -2
  94. package/cpp/llama-vocab.cpp +65 -28
  95. package/cpp/llama-vocab.h +1 -0
  96. package/cpp/llama.cpp +11 -7
  97. package/cpp/llama.h +150 -42
  98. package/cpp/minja/chat-template.hpp +1 -1
  99. package/cpp/minja/minja.hpp +1 -1
  100. package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
  101. package/cpp/nlohmann/json_fwd.hpp +187 -0
  102. package/cpp/regex-partial.cpp +204 -0
  103. package/cpp/regex-partial.h +56 -0
  104. package/cpp/rn-llama.cpp +646 -35
  105. package/cpp/rn-llama.h +32 -1
  106. package/cpp/rn-tts.h +39 -0
  107. package/cpp/sampling.cpp +7 -8
  108. package/cpp/tools/mtmd/clip-impl.h +5 -0
  109. package/cpp/tools/mtmd/clip.cpp +572 -436
  110. package/cpp/tools/mtmd/clip.h +14 -4
  111. package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
  112. package/cpp/tools/mtmd/mtmd-audio.h +2 -17
  113. package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
  114. package/cpp/tools/mtmd/mtmd-helper.h +91 -0
  115. package/cpp/tools/mtmd/mtmd.cpp +368 -248
  116. package/cpp/tools/mtmd/mtmd.h +6 -70
  117. package/cpp/unicode.cpp +5 -0
  118. package/ios/CMakeLists.txt +26 -6
  119. package/ios/RNLlama.h +1 -1
  120. package/ios/RNLlama.mm +153 -3
  121. package/ios/RNLlamaContext.h +9 -1
  122. package/ios/RNLlamaContext.mm +112 -9
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
  131. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  132. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
  133. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
  134. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  135. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
  136. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
  137. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
  138. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
  139. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  140. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  141. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  142. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  143. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  144. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
  145. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
  146. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
  147. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
  148. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  149. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
  150. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
  151. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  152. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
  153. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
  154. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
  155. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  156. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
  160. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
  161. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
  162. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
  163. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
  164. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
  165. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  166. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
  167. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
  168. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  169. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
  170. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
  171. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
  172. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
  173. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  174. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  175. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  176. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  177. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  178. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
  179. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
  180. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
  181. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
  182. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  183. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
  184. package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
  185. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  186. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
  187. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
  188. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
  189. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  190. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  191. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
  192. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
  193. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
  194. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
  195. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
  196. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
  197. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
  198. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
  199. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  200. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
  201. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
  202. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  203. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
  204. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
  205. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
  206. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
  207. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  208. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  209. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  210. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  211. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  212. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
  213. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
  214. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
  215. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
  216. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  217. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
  218. package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
  219. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  220. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
  221. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
  222. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
  223. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  224. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  225. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
  226. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
  227. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
  228. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
  229. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
  230. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
  231. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
  232. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
  233. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  234. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
  235. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
  236. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  237. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
  238. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
  239. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
  240. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
  241. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  242. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  243. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  244. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  245. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  246. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
  247. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
  248. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
  249. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
  250. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  251. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
  252. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
  253. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  254. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
  255. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
  256. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
  257. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  258. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  259. package/jest/mock.js +24 -0
  260. package/package.json +1 -1
  261. package/src/NativeRNLlama.ts +46 -2
  262. package/src/index.ts +105 -1
  263. package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  264. package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
  265. package/cpp/ggml-cpu/sgemm.cpp +0 -3544
  266. package/cpp/ggml-cpu/sgemm.h +0 -14
  267. package/cpp/llama-kv-cache.cpp +0 -2827
  268. package/cpp/llama-kv-cache.h +0 -515
  269. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  270. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  271. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  272. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
  273. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  274. /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
  275. /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
  276. /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
@@ -0,0 +1,187 @@
1
+ // __ _____ _____ _____
2
+ // __| | __| | | | JSON for Modern C++
3
+ // | | |__ | | | | | | version 3.12.0
4
+ // |_____|_____|_____|_|___| https://github.com/nlohmann/json
5
+ //
6
+ // SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann <https://nlohmann.me>
7
+ // SPDX-License-Identifier: MIT
8
+
9
+ #ifndef INCLUDE_NLOHMANN_JSON_FWD_HPP_
10
+ #define INCLUDE_NLOHMANN_JSON_FWD_HPP_
11
+
12
+ #include <cstdint> // int64_t, uint64_t
13
+ #include <map> // map
14
+ #include <memory> // allocator
15
+ #include <string> // string
16
+ #include <vector> // vector
17
+
18
+ // #include <nlohmann/detail/abi_macros.hpp>
19
+ // __ _____ _____ _____
20
+ // __| | __| | | | JSON for Modern C++
21
+ // | | |__ | | | | | | version 3.12.0
22
+ // |_____|_____|_____|_|___| https://github.com/nlohmann/json
23
+ //
24
+ // SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann <https://nlohmann.me>
25
+ // SPDX-License-Identifier: MIT
26
+
27
+
28
+
29
+ // This file contains all macro definitions affecting or depending on the ABI
30
+
31
+ #ifndef JSON_SKIP_LIBRARY_VERSION_CHECK
32
+ #if defined(NLOHMANN_JSON_VERSION_MAJOR) && defined(NLOHMANN_JSON_VERSION_MINOR) && defined(NLOHMANN_JSON_VERSION_PATCH)
33
+ #if NLOHMANN_JSON_VERSION_MAJOR != 3 || NLOHMANN_JSON_VERSION_MINOR != 12 || NLOHMANN_JSON_VERSION_PATCH != 0
34
+ #warning "Already included a different version of the library!"
35
+ #endif
36
+ #endif
37
+ #endif
38
+
39
+ #define NLOHMANN_JSON_VERSION_MAJOR 3 // NOLINT(modernize-macro-to-enum)
40
+ #define NLOHMANN_JSON_VERSION_MINOR 12 // NOLINT(modernize-macro-to-enum)
41
+ #define NLOHMANN_JSON_VERSION_PATCH 0 // NOLINT(modernize-macro-to-enum)
42
+
43
+ #ifndef JSON_DIAGNOSTICS
44
+ #define JSON_DIAGNOSTICS 0
45
+ #endif
46
+
47
+ #ifndef JSON_DIAGNOSTIC_POSITIONS
48
+ #define JSON_DIAGNOSTIC_POSITIONS 0
49
+ #endif
50
+
51
+ #ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON
52
+ #define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0
53
+ #endif
54
+
55
+ #if JSON_DIAGNOSTICS
56
+ #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS _diag
57
+ #else
58
+ #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS
59
+ #endif
60
+
61
+ #if JSON_DIAGNOSTIC_POSITIONS
62
+ #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS _dp
63
+ #else
64
+ #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS
65
+ #endif
66
+
67
+ #if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON
68
+ #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp
69
+ #else
70
+ #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON
71
+ #endif
72
+
73
+ #ifndef NLOHMANN_JSON_NAMESPACE_NO_VERSION
74
+ #define NLOHMANN_JSON_NAMESPACE_NO_VERSION 0
75
+ #endif
76
+
77
+ // Construct the namespace ABI tags component
78
+ #define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c) json_abi ## a ## b ## c
79
+ #define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b, c) \
80
+ NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c)
81
+
82
+ #define NLOHMANN_JSON_ABI_TAGS \
83
+ NLOHMANN_JSON_ABI_TAGS_CONCAT( \
84
+ NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \
85
+ NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON, \
86
+ NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS)
87
+
88
+ // Construct the namespace version component
89
+ #define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \
90
+ _v ## major ## _ ## minor ## _ ## patch
91
+ #define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(major, minor, patch) \
92
+ NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch)
93
+
94
+ #if NLOHMANN_JSON_NAMESPACE_NO_VERSION
95
+ #define NLOHMANN_JSON_NAMESPACE_VERSION
96
+ #else
97
+ #define NLOHMANN_JSON_NAMESPACE_VERSION \
98
+ NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(NLOHMANN_JSON_VERSION_MAJOR, \
99
+ NLOHMANN_JSON_VERSION_MINOR, \
100
+ NLOHMANN_JSON_VERSION_PATCH)
101
+ #endif
102
+
103
+ // Combine namespace components
104
+ #define NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) a ## b
105
+ #define NLOHMANN_JSON_NAMESPACE_CONCAT(a, b) \
106
+ NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b)
107
+
108
+ #ifndef NLOHMANN_JSON_NAMESPACE
109
+ #define NLOHMANN_JSON_NAMESPACE \
110
+ nlohmann::NLOHMANN_JSON_NAMESPACE_CONCAT( \
111
+ NLOHMANN_JSON_ABI_TAGS, \
112
+ NLOHMANN_JSON_NAMESPACE_VERSION)
113
+ #endif
114
+
115
+ #ifndef NLOHMANN_JSON_NAMESPACE_BEGIN
116
+ #define NLOHMANN_JSON_NAMESPACE_BEGIN \
117
+ namespace nlohmann \
118
+ { \
119
+ inline namespace NLOHMANN_JSON_NAMESPACE_CONCAT( \
120
+ NLOHMANN_JSON_ABI_TAGS, \
121
+ NLOHMANN_JSON_NAMESPACE_VERSION) \
122
+ {
123
+ #endif
124
+
125
+ #ifndef NLOHMANN_JSON_NAMESPACE_END
126
+ #define NLOHMANN_JSON_NAMESPACE_END \
127
+ } /* namespace (inline namespace) NOLINT(readability/namespace) */ \
128
+ } // namespace nlohmann
129
+ #endif
130
+
131
+
132
+ /*!
133
+ @brief namespace for Niels Lohmann
134
+ @see https://github.com/nlohmann
135
+ @since version 1.0.0
136
+ */
137
+ NLOHMANN_JSON_NAMESPACE_BEGIN
138
+
139
+ /*!
140
+ @brief default JSONSerializer template argument
141
+
142
+ This serializer ignores the template arguments and uses ADL
143
+ ([argument-dependent lookup](https://en.cppreference.com/w/cpp/language/adl))
144
+ for serialization.
145
+ */
146
+ template<typename T = void, typename SFINAE = void>
147
+ struct adl_serializer;
148
+
149
+ /// a class to store JSON values
150
+ /// @sa https://json.nlohmann.me/api/basic_json/
151
+ template<template<typename U, typename V, typename... Args> class ObjectType =
152
+ std::map,
153
+ template<typename U, typename... Args> class ArrayType = std::vector,
154
+ class StringType = std::string, class BooleanType = bool,
155
+ class NumberIntegerType = std::int64_t,
156
+ class NumberUnsignedType = std::uint64_t,
157
+ class NumberFloatType = double,
158
+ template<typename U> class AllocatorType = std::allocator,
159
+ template<typename T, typename SFINAE = void> class JSONSerializer =
160
+ adl_serializer,
161
+ class BinaryType = std::vector<std::uint8_t>, // cppcheck-suppress syntaxError
162
+ class CustomBaseClass = void>
163
+ class basic_json;
164
+
165
+ /// @brief JSON Pointer defines a string syntax for identifying a specific value within a JSON document
166
+ /// @sa https://json.nlohmann.me/api/json_pointer/
167
+ template<typename RefStringType>
168
+ class json_pointer;
169
+
170
+ /*!
171
+ @brief default specialization
172
+ @sa https://json.nlohmann.me/api/json/
173
+ */
174
+ using json = basic_json<>;
175
+
176
+ /// @brief a minimal map-like container that preserves insertion order
177
+ /// @sa https://json.nlohmann.me/api/ordered_map/
178
+ template<class Key, class T, class IgnoredLess, class Allocator>
179
+ struct ordered_map;
180
+
181
+ /// @brief specialization that maintains the insertion order of object keys
182
+ /// @sa https://json.nlohmann.me/api/ordered_json/
183
+ using ordered_json = basic_json<nlohmann::ordered_map>;
184
+
185
+ NLOHMANN_JSON_NAMESPACE_END
186
+
187
+ #endif // INCLUDE_NLOHMANN_JSON_FWD_HPP_
@@ -0,0 +1,56 @@
1
+ #pragma once
2
+
3
+ #include <regex>
4
+ #include <string>
5
+
6
+ enum common_regex_match_type {
7
+ COMMON_REGEX_MATCH_TYPE_NONE,
8
+ COMMON_REGEX_MATCH_TYPE_PARTIAL,
9
+ COMMON_REGEX_MATCH_TYPE_FULL,
10
+ };
11
+
12
+ struct common_string_range {
13
+ size_t begin;
14
+ size_t end;
15
+ common_string_range(size_t begin, size_t end) : begin(begin), end(end) {
16
+ if (begin > end) {
17
+ throw std::runtime_error("Invalid range");
18
+ }
19
+ }
20
+ // prevent default ctor
21
+ common_string_range() = delete;
22
+ bool empty() const {
23
+ return begin == end;
24
+ }
25
+ bool operator==(const common_string_range & other) const {
26
+ return begin == other.begin && end == other.end;
27
+ }
28
+ };
29
+
30
+ struct common_regex_match {
31
+ common_regex_match_type type = COMMON_REGEX_MATCH_TYPE_NONE;
32
+ std::vector<common_string_range> groups;
33
+
34
+ bool operator==(const common_regex_match & other) const {
35
+ return type == other.type && groups == other.groups;
36
+ }
37
+ bool operator!=(const common_regex_match & other) const {
38
+ return !(*this == other);
39
+ }
40
+ };
41
+
42
+ class common_regex {
43
+ std::string pattern;
44
+ std::regex rx;
45
+ std::regex rx_reversed_partial;
46
+
47
+ public:
48
+ explicit common_regex(const std::string & pattern);
49
+
50
+ common_regex_match search(const std::string & input, size_t pos, bool as_match = false) const;
51
+
52
+ const std::string & str() const { return pattern; }
53
+ };
54
+
55
+ // For testing only (pretty print of failures).
56
+ std::string regex_to_reversed_partial_regex(const std::string & pattern);
@@ -3,6 +3,7 @@
3
3
 
4
4
  #include <sstream>
5
5
  #include <iostream>
6
+ #include <thread>
6
7
  #include "chat.h"
7
8
  #include "common.h"
8
9
  #include "ggml.h"
@@ -10,10 +11,13 @@
10
11
  #include "llama.h"
11
12
  #include "llama-impl.h"
12
13
  #include "sampling.h"
14
+ #include "nlohmann/json.hpp"
13
15
  #if defined(__ANDROID__)
14
16
  #include <android/log.h>
15
17
  #endif
16
18
 
19
+ using json = nlohmann::ordered_json;
20
+
17
21
  namespace rnllama {
18
22
 
19
23
  std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token);
@@ -43,6 +47,8 @@ struct completion_token_output
43
47
 
44
48
  struct llama_rn_context_mtmd;
45
49
 
50
+ struct llama_rn_context_vocoder;
51
+
46
52
  struct llama_rn_tokenize_result {
47
53
  std::vector<llama_token> tokens;
48
54
  bool has_media = false;
@@ -51,6 +57,12 @@ struct llama_rn_tokenize_result {
51
57
  std::vector<size_t> chunk_pos_media; // media only
52
58
  };
53
59
 
60
+ enum tts_type {
61
+ UNKNOWN = -1,
62
+ OUTETTS_V0_2 = 1,
63
+ OUTETTS_V0_3 = 2,
64
+ };
65
+
54
66
  // Main context class
55
67
  struct llama_rn_context {
56
68
  bool is_predicting = false;
@@ -58,6 +70,7 @@ struct llama_rn_context {
58
70
  bool has_next_token = false;
59
71
  std::string generated_text;
60
72
  std::vector<completion_token_output> generated_token_probs;
73
+ std::vector<llama_token> audio_tokens;
61
74
 
62
75
  size_t num_prompt_tokens = 0;
63
76
  size_t num_tokens_predicted = 0;
@@ -69,6 +82,9 @@ struct llama_rn_context {
69
82
  common_params params;
70
83
  common_init_result llama_init;
71
84
 
85
+ bool next_token_uses_guide_token = true;
86
+ std::vector<llama_token> guide_tokens;
87
+
72
88
  llama_model *model = nullptr;
73
89
  float loading_progress = 0;
74
90
  bool is_load_interrupted = false;
@@ -92,6 +108,9 @@ struct llama_rn_context {
92
108
  llama_rn_context_mtmd *mtmd_wrapper = nullptr;
93
109
  bool has_multimodal = false;
94
110
 
111
+ llama_rn_context_vocoder *vocoder_wrapper = nullptr;
112
+ bool has_vocoder = false;
113
+
95
114
  ~llama_rn_context();
96
115
 
97
116
  void rewind();
@@ -104,7 +123,8 @@ struct llama_rn_context {
104
123
  const std::string &json_schema,
105
124
  const std::string &tools,
106
125
  const bool &parallel_tool_calls,
107
- const std::string &tool_choice
126
+ const std::string &tool_choice,
127
+ const bool &enable_thinking
108
128
  ) const;
109
129
  std::string getFormattedChat(
110
130
  const std::string &messages,
@@ -112,12 +132,14 @@ struct llama_rn_context {
112
132
  ) const;
113
133
  void truncatePrompt(std::vector<llama_token> &prompt_tokens);
114
134
  void loadPrompt(const std::vector<std::string> &media_paths);
135
+ void setGuideTokens(const std::vector<llama_token> &tokens);
115
136
  void beginCompletion();
116
137
  void endCompletion();
117
138
  completion_token_output nextToken();
118
139
  size_t findStoppingStrings(const std::string &text, const size_t last_token_size, const stop_type type);
119
140
  completion_token_output doCompletion();
120
141
  std::vector<float> getEmbedding(common_params &embd_params);
142
+ std::vector<float> rerank(const std::string &query, const std::vector<std::string> &documents);
121
143
  std::string bench(int pp, int tg, int pl, int nr);
122
144
  int applyLoraAdapters(std::vector<common_adapter_lora_info> lora);
123
145
  void removeLoraAdapters();
@@ -137,6 +159,15 @@ struct llama_rn_context {
137
159
  );
138
160
 
139
161
  llama_rn_tokenize_result tokenize(const std::string &text, const std::vector<std::string> &media_paths);
162
+
163
+ // Vocoder methods
164
+ bool initVocoder(const std::string &vocoder_model_path);
165
+ tts_type getTTSType(json speaker = nullptr);
166
+ std::string getFormattedAudioCompletion(const std::string &speaker_json_str, const std::string &text_to_speak);
167
+ std::vector<llama_token> getAudioCompletionGuideTokens(const std::string &text_to_speak);
168
+ std::vector<float> decodeAudioTokens(const std::vector<llama_token> &tokens);
169
+ bool isVocoderEnabled() const;
170
+ void releaseVocoder();
140
171
  };
141
172
 
142
173
  // Logging macros
@@ -0,0 +1,39 @@
1
+
2
+
3
+ namespace rnllama {
4
+
5
+ // the default speaker profile is from: https://github.com/edwko/OuteTTS/blob/main/outetts/version/v1/default_speakers/en_male_1.json
6
+ static const std::string default_audio_text = "<|text_start|>the<|text_sep|>overall<|text_sep|>package<|text_sep|>from<|text_sep|>just<|text_sep|>two<|text_sep|>people<|text_sep|>is<|text_sep|>pretty<|text_sep|>remarkable<|text_sep|>sure<|text_sep|>i<|text_sep|>have<|text_sep|>some<|text_sep|>critiques<|text_sep|>about<|text_sep|>some<|text_sep|>of<|text_sep|>the<|text_sep|>gameplay<|text_sep|>aspects<|text_sep|>but<|text_sep|>its<|text_sep|>still<|text_sep|>really<|text_sep|>enjoyable<|text_sep|>and<|text_sep|>it<|text_sep|>looks<|text_sep|>lovely<|text_sep|>";
7
+ static const std::string default_audio_data = R"(<|audio_start|>
8
+ the<|t_0.08|><|code_start|><|257|><|740|><|636|><|913|><|788|><|1703|><|code_end|>
9
+ overall<|t_0.36|><|code_start|><|127|><|201|><|191|><|774|><|700|><|532|><|1056|><|557|><|798|><|298|><|1741|><|747|><|1662|><|1617|><|1702|><|1527|><|368|><|1588|><|1049|><|1008|><|1625|><|747|><|1576|><|728|><|1019|><|1696|><|1765|><|code_end|>
10
+ package<|t_0.56|><|code_start|><|935|><|584|><|1319|><|627|><|1016|><|1491|><|1344|><|1117|><|1526|><|1040|><|239|><|1435|><|951|><|498|><|723|><|1180|><|535|><|789|><|1649|><|1637|><|78|><|465|><|1668|><|901|><|595|><|1675|><|117|><|1009|><|1667|><|320|><|840|><|79|><|507|><|1762|><|1508|><|1228|><|1768|><|802|><|1450|><|1457|><|232|><|639|><|code_end|>
11
+ from<|t_0.19|><|code_start|><|604|><|782|><|1682|><|872|><|1532|><|1600|><|1036|><|1761|><|647|><|1554|><|1371|><|653|><|1595|><|950|><|code_end|>
12
+ just<|t_0.25|><|code_start|><|1782|><|1670|><|317|><|786|><|1748|><|631|><|599|><|1155|><|1364|><|1524|><|36|><|1591|><|889|><|1535|><|541|><|440|><|1532|><|50|><|870|><|code_end|>
13
+ two<|t_0.24|><|code_start|><|1681|><|1510|><|673|><|799|><|805|><|1342|><|330|><|519|><|62|><|640|><|1138|><|565|><|1552|><|1497|><|1552|><|572|><|1715|><|1732|><|code_end|>
14
+ people<|t_0.39|><|code_start|><|593|><|274|><|136|><|740|><|691|><|633|><|1484|><|1061|><|1138|><|1485|><|344|><|428|><|397|><|1562|><|645|><|917|><|1035|><|1449|><|1669|><|487|><|442|><|1484|><|1329|><|1832|><|1704|><|600|><|761|><|653|><|269|><|code_end|>
15
+ is<|t_0.16|><|code_start|><|566|><|583|><|1755|><|646|><|1337|><|709|><|802|><|1008|><|485|><|1583|><|652|><|10|><|code_end|>
16
+ pretty<|t_0.32|><|code_start|><|1818|><|1747|><|692|><|733|><|1010|><|534|><|406|><|1697|><|1053|><|1521|><|1355|><|1274|><|816|><|1398|><|211|><|1218|><|817|><|1472|><|1703|><|686|><|13|><|822|><|445|><|1068|><|code_end|>
17
+ remarkable<|t_0.68|><|code_start|><|230|><|1048|><|1705|><|355|><|706|><|1149|><|1535|><|1787|><|1356|><|1396|><|835|><|1583|><|486|><|1249|><|286|><|937|><|1076|><|1150|><|614|><|42|><|1058|><|705|><|681|><|798|><|934|><|490|><|514|><|1399|><|572|><|1446|><|1703|><|1346|><|1040|><|1426|><|1304|><|664|><|171|><|1530|><|625|><|64|><|1708|><|1830|><|1030|><|443|><|1509|><|1063|><|1605|><|1785|><|721|><|1440|><|923|><|code_end|>
18
+ sure<|t_0.36|><|code_start|><|792|><|1780|><|923|><|1640|><|265|><|261|><|1525|><|567|><|1491|><|1250|><|1730|><|362|><|919|><|1766|><|543|><|1|><|333|><|113|><|970|><|252|><|1606|><|133|><|302|><|1810|><|1046|><|1190|><|1675|><|code_end|>
19
+ i<|t_0.08|><|code_start|><|123|><|439|><|1074|><|705|><|1799|><|637|><|code_end|>
20
+ have<|t_0.16|><|code_start|><|1509|><|599|><|518|><|1170|><|552|><|1029|><|1267|><|864|><|419|><|143|><|1061|><|0|><|code_end|>
21
+ some<|t_0.16|><|code_start|><|619|><|400|><|1270|><|62|><|1370|><|1832|><|917|><|1661|><|167|><|269|><|1366|><|1508|><|code_end|>
22
+ critiques<|t_0.60|><|code_start|><|559|><|584|><|1163|><|1129|><|1313|><|1728|><|721|><|1146|><|1093|><|577|><|928|><|27|><|630|><|1080|><|1346|><|1337|><|320|><|1382|><|1175|><|1682|><|1556|><|990|><|1683|><|860|><|1721|><|110|><|786|><|376|><|1085|><|756|><|1523|><|234|><|1334|><|1506|><|1578|><|659|><|612|><|1108|><|1466|><|1647|><|308|><|1470|><|746|><|556|><|1061|><|code_end|>
23
+ about<|t_0.29|><|code_start|><|26|><|1649|><|545|><|1367|><|1263|><|1728|><|450|><|859|><|1434|><|497|><|1220|><|1285|><|179|><|755|><|1154|><|779|><|179|><|1229|><|1213|><|922|><|1774|><|1408|><|code_end|>
24
+ some<|t_0.23|><|code_start|><|986|><|28|><|1649|><|778|><|858|><|1519|><|1|><|18|><|26|><|1042|><|1174|><|1309|><|1499|><|1712|><|1692|><|1516|><|1574|><|code_end|>
25
+ of<|t_0.07|><|code_start|><|197|><|716|><|1039|><|1662|><|64|><|code_end|>
26
+ the<|t_0.08|><|code_start|><|1811|><|1568|><|569|><|886|><|1025|><|1374|><|code_end|>
27
+ gameplay<|t_0.48|><|code_start|><|1269|><|1092|><|933|><|1362|><|1762|><|1700|><|1675|><|215|><|781|><|1086|><|461|><|838|><|1022|><|759|><|649|><|1416|><|1004|><|551|><|909|><|787|><|343|><|830|><|1391|><|1040|><|1622|><|1779|><|1360|><|1231|><|1187|><|1317|><|76|><|997|><|989|><|978|><|737|><|189|><|code_end|>
28
+ aspects<|t_0.56|><|code_start|><|1423|><|797|><|1316|><|1222|><|147|><|719|><|1347|><|386|><|1390|><|1558|><|154|><|440|><|634|><|592|><|1097|><|1718|><|712|><|763|><|1118|><|1721|><|1311|><|868|><|580|><|362|><|1435|><|868|><|247|><|221|><|886|><|1145|><|1274|><|1284|><|457|><|1043|><|1459|><|1818|><|62|><|599|><|1035|><|62|><|1649|><|778|><|code_end|>
29
+ but<|t_0.20|><|code_start|><|780|><|1825|><|1681|><|1007|><|861|><|710|><|702|><|939|><|1669|><|1491|><|613|><|1739|><|823|><|1469|><|648|><|code_end|>
30
+ its<|t_0.09|><|code_start|><|92|><|688|><|1623|><|962|><|1670|><|527|><|599|><|code_end|>
31
+ still<|t_0.27|><|code_start|><|636|><|10|><|1217|><|344|><|713|><|957|><|823|><|154|><|1649|><|1286|><|508|><|214|><|1760|><|1250|><|456|><|1352|><|1368|><|921|><|615|><|5|><|code_end|>
32
+ really<|t_0.36|><|code_start|><|55|><|420|><|1008|><|1659|><|27|><|644|><|1266|><|617|><|761|><|1712|><|109|><|1465|><|1587|><|503|><|1541|><|619|><|197|><|1019|><|817|><|269|><|377|><|362|><|1381|><|507|><|1488|><|4|><|1695|><|code_end|>
33
+ enjoyable<|t_0.49|><|code_start|><|678|><|501|><|864|><|319|><|288|><|1472|><|1341|><|686|><|562|><|1463|><|619|><|1563|><|471|><|911|><|730|><|1811|><|1006|><|520|><|861|><|1274|><|125|><|1431|><|638|><|621|><|153|><|876|><|1770|><|437|><|987|><|1653|><|1109|><|898|><|1285|><|80|><|593|><|1709|><|843|><|code_end|>
34
+ and<|t_0.15|><|code_start|><|1285|><|987|><|303|><|1037|><|730|><|1164|><|502|><|120|><|1737|><|1655|><|1318|><|code_end|>
35
+ it<|t_0.09|><|code_start|><|848|><|1366|><|395|><|1601|><|1513|><|593|><|1302|><|code_end|>
36
+ looks<|t_0.27|><|code_start|><|1281|><|1266|><|1755|><|572|><|248|><|1751|><|1257|><|695|><|1380|><|457|><|659|><|585|><|1315|><|1105|><|1776|><|736|><|24|><|736|><|654|><|1027|><|code_end|>
37
+ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|1481|><|1721|><|1123|><|438|><|1246|><|1251|><|795|><|659|><|1381|><|1658|><|217|><|1772|><|562|><|952|><|107|><|1129|><|1112|><|467|><|550|><|1079|><|840|><|1615|><|1469|><|1380|><|168|><|917|><|836|><|1827|><|437|><|583|><|67|><|595|><|1087|><|1646|><|1493|><|1677|><|code_end|>)";
38
+
39
+ }
@@ -0,0 +1,120 @@
1
+ #pragma once
2
+
3
+ #include "chat.h"
4
+ #include "json-partial.h"
5
+ #include "regex-partial.h"
6
+
7
+ #include "nlohmann/json.hpp"
8
+
9
+ #include <optional>
10
+ #include <string>
11
+ #include <vector>
12
+
13
+ class common_chat_msg_partial_exception : public std::runtime_error {
14
+ public:
15
+ common_chat_msg_partial_exception(const std::string & message) : std::runtime_error(message) {}
16
+ };
17
+
18
+ class common_chat_msg_parser {
19
+ std::string input_;
20
+ bool is_partial_;
21
+ common_chat_syntax syntax_;
22
+ std::string healing_marker_;
23
+
24
+ size_t pos_ = 0;
25
+ common_chat_msg result_;
26
+
27
+ public:
28
+ common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
29
+ const std::string & input() const { return input_; }
30
+ size_t pos() const { return pos_; }
31
+ const std::string & healing_marker() const { return healing_marker_; }
32
+ const bool & is_partial() const { return is_partial_; }
33
+ const common_chat_msg & result() const { return result_; }
34
+ const common_chat_syntax & syntax() const { return syntax_; }
35
+
36
+ void move_to(size_t pos) {
37
+ if (pos > input_.size()) {
38
+ throw std::runtime_error("Invalid position!");
39
+ }
40
+ pos_ = pos;
41
+ }
42
+ void move_back(size_t n) {
43
+ if (pos_ < n) {
44
+ throw std::runtime_error("Can't move back that far!");
45
+ }
46
+ pos_ -= n;
47
+ }
48
+
49
+ // Get the substring of the input at the given range
50
+ std::string str(const common_string_range & rng) const;
51
+
52
+ // Appends to the result.content field
53
+ void add_content(const std::string & content);
54
+
55
+ // Appends to the result.reasoning_content field
56
+ void add_reasoning_content(const std::string & reasoning_content);
57
+
58
+ // Adds a tool call to the result. If the tool call is too incomplete (e.g. name empty), it won't add anything.
59
+ bool add_tool_call(const std::string & name, const std::string & id, const std::string & arguments);
60
+
61
+ // Adds a tool call using the "name", "id" and "arguments" fields of the json object
62
+ bool add_tool_call(const nlohmann::ordered_json & tool_call);
63
+
64
+ // Adds an array of tool calls using their "name", "id" and "arguments" fields.
65
+ bool add_tool_calls(const nlohmann::ordered_json & arr);
66
+
67
+ void finish();
68
+
69
+ bool consume_spaces();
70
+
71
+ void consume_literal(const std::string & literal);
72
+
73
+ bool try_parse_reasoning(const std::string & start_think, const std::string & end_think);
74
+
75
+ std::string consume_rest();
76
+
77
+ struct find_regex_result {
78
+ std::string prelude;
79
+ std::vector<common_string_range> groups;
80
+ };
81
+
82
+ std::optional<find_regex_result> try_find_regex(const common_regex & regex, size_t from = std::string::npos, bool add_prelude_to_content = true);
83
+
84
+ bool try_consume_literal(const std::string & literal);
85
+
86
+ std::optional<find_regex_result> try_find_literal(const std::string & literal);
87
+
88
+ find_regex_result consume_regex(const common_regex & regex);
89
+
90
+ std::optional<find_regex_result> try_consume_regex(const common_regex & regex);
91
+
92
+ std::optional<common_json> try_consume_json();
93
+ common_json consume_json();
94
+
95
+ struct consume_json_result {
96
+ nlohmann::ordered_json value;
97
+ bool is_partial;
98
+ };
99
+
100
+ /*
101
+ Consume (possibly partial) json and converts specific subtrees to (possibly truncated) JSON strings.
102
+
103
+ By default, object keys can't be truncated, nor can string values (their corresponding key is removed,
104
+ e.g. `{"foo": "bar", "baz": "b` -> `{"foo": "bar"}`
105
+
106
+ But one can allow subpaths to be kept truncated, and possibly json-dumped to truncated json strings
107
+ - with `content_paths={{"foo"}}` -> `{"foo": "b` -> {"foo": "b"}`
108
+ - with `args_paths={{"foo"}}` -> `{"foo": {"b` -> `{"foo": "{b"}`
109
+ */
110
+ consume_json_result consume_json_with_dumped_args(
111
+ const std::vector<std::vector<std::string>> & args_paths = {},
112
+ const std::vector<std::vector<std::string>> & content_paths = {}
113
+ );
114
+ std::optional<consume_json_result> try_consume_json_with_dumped_args(
115
+ const std::vector<std::vector<std::string>> & args_paths = {},
116
+ const std::vector<std::vector<std::string>> & content_paths = {}
117
+ );
118
+
119
+ void clear_tools();
120
+ };
@@ -3,6 +3,7 @@
3
3
  #pragma once
4
4
 
5
5
  #include "common.h"
6
+ #include <functional>
6
7
  #include <chrono>
7
8
  #include <string>
8
9
  #include <vector>
@@ -21,11 +22,19 @@ struct common_chat_tool_call {
21
22
  std::string name;
22
23
  std::string arguments;
23
24
  std::string id;
25
+
26
+ bool operator==(const common_chat_tool_call & other) const {
27
+ return name == other.name && arguments == other.arguments && id == other.id;
28
+ }
24
29
  };
25
30
 
26
31
  struct common_chat_msg_content_part {
27
32
  std::string type;
28
33
  std::string text;
34
+
35
+ bool operator==(const common_chat_msg_content_part & other) const {
36
+ return type == other.type && text == other.text;
37
+ }
29
38
  };
30
39
 
31
40
  struct common_chat_msg {
@@ -36,6 +45,51 @@ struct common_chat_msg {
36
45
  std::string reasoning_content;
37
46
  std::string tool_name;
38
47
  std::string tool_call_id;
48
+
49
+ template <class T> T to_json_oaicompat() const;
50
+
51
+ bool empty() const {
52
+ return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && tool_name.empty() && tool_call_id.empty();
53
+ }
54
+ void ensure_tool_call_ids_set(std::vector<std::string> & ids_cache, const std::function<std::string()> & gen_tool_call_id) {
55
+ for (auto i = 0u; i < tool_calls.size(); i++) {
56
+ if (ids_cache.size() <= i) {
57
+ auto id = tool_calls[i].id;
58
+ if (id.empty()) {
59
+ id = gen_tool_call_id();
60
+ }
61
+ ids_cache.push_back(id);
62
+ }
63
+ tool_calls[i].id = ids_cache[i];
64
+ }
65
+ }
66
+ bool operator==(const common_chat_msg & other) const {
67
+ return role == other.role
68
+ && content == other.content
69
+ && content_parts == other.content_parts
70
+ && tool_calls == other.tool_calls
71
+ && reasoning_content == other.reasoning_content
72
+ && tool_name == other.tool_name
73
+ && tool_call_id == other.tool_call_id;
74
+ }
75
+ bool operator!=(const common_chat_msg & other) const {
76
+ return !(*this == other);
77
+ }
78
+ };
79
+
80
+ struct common_chat_msg_diff {
81
+ std::string reasoning_content_delta;
82
+ std::string content_delta;
83
+ size_t tool_call_index = std::string::npos;
84
+ common_chat_tool_call tool_call_delta;
85
+
86
+ static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg);
87
+
88
+ bool operator==(const common_chat_msg_diff & other) const {
89
+ return content_delta == other.content_delta
90
+ && tool_call_index == other.tool_call_index
91
+ && tool_call_delta == other.tool_call_delta;
92
+ }
39
93
  };
40
94
 
41
95
  struct common_chat_tool {
@@ -57,14 +111,11 @@ enum common_chat_format {
57
111
  COMMON_CHAT_FORMAT_LLAMA_3_X,
58
112
  COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
59
113
  COMMON_CHAT_FORMAT_DEEPSEEK_R1,
60
- COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING,
61
114
  COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
62
115
  COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
63
116
  COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
64
117
  COMMON_CHAT_FORMAT_HERMES_2_PRO,
65
- COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING,
66
118
  COMMON_CHAT_FORMAT_COMMAND_R7B,
67
- COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING,
68
119
 
69
120
  COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
70
121
  };
@@ -79,7 +130,8 @@ struct common_chat_templates_inputs {
79
130
  std::vector<common_chat_tool> tools;
80
131
  common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
81
132
  bool parallel_tool_calls = false;
82
- bool extract_reasoning = true;
133
+ common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
134
+ bool enable_thinking = true;
83
135
  std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
84
136
  };
85
137
 
@@ -88,11 +140,21 @@ struct common_chat_params {
88
140
  std::string prompt;
89
141
  std::string grammar;
90
142
  bool grammar_lazy = false;
143
+ bool thinking_forced_open = false;
91
144
  std::vector<common_grammar_trigger> grammar_triggers;
92
145
  std::vector<std::string> preserved_tokens;
93
146
  std::vector<std::string> additional_stops;
94
147
  };
95
148
 
149
+ struct common_chat_syntax {
150
+ common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
151
+ common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
152
+ // Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
153
+ bool reasoning_in_content = false;
154
+ bool thinking_forced_open = false;
155
+ bool parse_tool_calls = true;
156
+ };
157
+
96
158
  // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
97
159
  bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
98
160
 
@@ -129,8 +191,9 @@ std::string common_chat_format_example(
129
191
  const struct common_chat_templates * tmpls,
130
192
  bool use_jinja);
131
193
 
132
- std::string common_chat_format_name(common_chat_format format);
133
- common_chat_msg common_chat_parse( const std::string & input, common_chat_format format);
194
+ const char* common_chat_format_name(common_chat_format format);
195
+ const char* common_reasoning_format_name(common_reasoning_format format);
196
+ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
134
197
 
135
198
  common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
136
199
 
@@ -143,3 +206,5 @@ template <class T> T common_chat_msgs_to_json_oaicompat(const std::vector<common
143
206
  // T can be std::string containing JSON or nlohmann::ordered_json
144
207
  template <class T> std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const T & tools);
145
208
  template <class T> T common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & tools);
209
+
210
+ template <class T> T common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff);