cui-llama.rn 1.7.3 → 1.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. package/README.md +217 -17
  2. package/android/src/main/CMakeLists.txt +34 -15
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +94 -8
  4. package/android/src/main/java/com/rnllama/RNLlama.java +247 -0
  5. package/android/src/main/jni.cpp +213 -14
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  14. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
  15. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
  16. package/cpp/README.md +1 -1
  17. package/cpp/chat-parser.cpp +385 -0
  18. package/cpp/chat-parser.h +120 -0
  19. package/cpp/chat.cpp +726 -596
  20. package/cpp/chat.h +71 -6
  21. package/cpp/common.cpp +56 -38
  22. package/cpp/common.h +9 -3
  23. package/cpp/ggml-backend-reg.cpp +5 -0
  24. package/cpp/ggml-backend.cpp +10 -2
  25. package/cpp/ggml-common.h +4 -0
  26. package/cpp/ggml-cpu/amx/amx.cpp +1 -1
  27. package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
  28. package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  29. package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
  30. package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
  31. package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  32. package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
  33. package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
  34. package/cpp/ggml-cpu/arch-fallback.h +184 -0
  35. package/cpp/ggml-cpu/common.h +4 -3
  36. package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
  37. package/cpp/ggml-cpu/ggml-cpu.c +123 -104
  38. package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
  39. package/cpp/ggml-cpu/ops.cpp +330 -148
  40. package/cpp/ggml-cpu/ops.h +1 -0
  41. package/cpp/ggml-cpu/quants.c +1158 -0
  42. package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
  43. package/cpp/ggml-cpu/repack.cpp +1571 -0
  44. package/cpp/ggml-cpu/repack.h +98 -0
  45. package/cpp/ggml-cpu/simd-mappings.h +330 -38
  46. package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
  47. package/cpp/ggml-cpu/vec.cpp +87 -18
  48. package/cpp/ggml-cpu/vec.h +249 -94
  49. package/cpp/ggml-cpu.h +1 -0
  50. package/cpp/ggml-impl.h +63 -183
  51. package/cpp/ggml-llama-sim.metallib +0 -0
  52. package/cpp/ggml-llama.metallib +0 -0
  53. package/cpp/ggml-metal.m +152 -45
  54. package/cpp/ggml-quants.c +0 -2
  55. package/cpp/ggml.c +61 -21
  56. package/cpp/ggml.h +22 -3
  57. package/cpp/gguf.cpp +24 -3
  58. package/cpp/json-partial.cpp +256 -0
  59. package/cpp/json-partial.h +38 -0
  60. package/cpp/json-schema-to-grammar.cpp +5 -47
  61. package/cpp/json-schema-to-grammar.h +4 -4
  62. package/cpp/llama-arch.cpp +153 -3
  63. package/cpp/llama-arch.h +27 -1
  64. package/cpp/llama-batch.cpp +741 -272
  65. package/cpp/llama-batch.h +112 -54
  66. package/cpp/llama-chat.cpp +30 -8
  67. package/cpp/llama-chat.h +1 -0
  68. package/cpp/llama-context.cpp +524 -339
  69. package/cpp/llama-context.h +38 -17
  70. package/cpp/llama-cparams.cpp +4 -0
  71. package/cpp/llama-cparams.h +2 -0
  72. package/cpp/llama-grammar.cpp +12 -2
  73. package/cpp/llama-graph.cpp +431 -356
  74. package/cpp/llama-graph.h +126 -58
  75. package/cpp/llama-hparams.cpp +10 -2
  76. package/cpp/llama-hparams.h +19 -2
  77. package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
  78. package/cpp/llama-kv-cache-unified-iswa.h +128 -0
  79. package/cpp/llama-kv-cache-unified.cpp +1841 -0
  80. package/cpp/llama-kv-cache-unified.h +303 -0
  81. package/cpp/llama-kv-cells.h +439 -0
  82. package/cpp/llama-memory-hybrid.cpp +246 -0
  83. package/cpp/llama-memory-hybrid.h +138 -0
  84. package/cpp/llama-memory-recurrent.cpp +1112 -0
  85. package/cpp/llama-memory-recurrent.h +183 -0
  86. package/cpp/llama-memory.cpp +41 -0
  87. package/cpp/llama-memory.h +86 -5
  88. package/cpp/llama-mmap.cpp +1 -1
  89. package/cpp/llama-model-loader.cpp +42 -17
  90. package/cpp/llama-model-saver.cpp +1 -0
  91. package/cpp/llama-model.cpp +1639 -513
  92. package/cpp/llama-model.h +26 -0
  93. package/cpp/llama-sampling.cpp +2 -2
  94. package/cpp/llama-vocab.cpp +65 -28
  95. package/cpp/llama-vocab.h +1 -0
  96. package/cpp/llama.cpp +11 -7
  97. package/cpp/llama.h +150 -42
  98. package/cpp/minja/chat-template.hpp +1 -1
  99. package/cpp/minja/minja.hpp +1 -1
  100. package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
  101. package/cpp/nlohmann/json_fwd.hpp +187 -0
  102. package/cpp/regex-partial.cpp +204 -0
  103. package/cpp/regex-partial.h +56 -0
  104. package/cpp/rn-llama.cpp +646 -35
  105. package/cpp/rn-llama.h +32 -1
  106. package/cpp/rn-tts.h +39 -0
  107. package/cpp/sampling.cpp +7 -8
  108. package/cpp/tools/mtmd/clip-impl.h +5 -0
  109. package/cpp/tools/mtmd/clip.cpp +572 -436
  110. package/cpp/tools/mtmd/clip.h +14 -4
  111. package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
  112. package/cpp/tools/mtmd/mtmd-audio.h +2 -17
  113. package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
  114. package/cpp/tools/mtmd/mtmd-helper.h +91 -0
  115. package/cpp/tools/mtmd/mtmd.cpp +368 -248
  116. package/cpp/tools/mtmd/mtmd.h +6 -70
  117. package/cpp/unicode.cpp +5 -0
  118. package/ios/CMakeLists.txt +26 -6
  119. package/ios/RNLlama.h +1 -1
  120. package/ios/RNLlama.mm +153 -3
  121. package/ios/RNLlamaContext.h +9 -1
  122. package/ios/RNLlamaContext.mm +112 -9
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
  131. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  132. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
  133. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
  134. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  135. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
  136. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
  137. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
  138. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
  139. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  140. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  141. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  142. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  143. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  144. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
  145. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
  146. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
  147. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
  148. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  149. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
  150. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
  151. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  152. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
  153. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
  154. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
  155. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  156. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
  160. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
  161. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
  162. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
  163. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
  164. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
  165. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  166. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
  167. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
  168. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  169. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
  170. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
  171. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
  172. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
  173. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  174. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  175. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  176. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  177. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  178. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
  179. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
  180. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
  181. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
  182. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  183. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
  184. package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
  185. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  186. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
  187. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
  188. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
  189. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  190. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  191. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
  192. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
  193. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
  194. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
  195. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
  196. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
  197. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
  198. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
  199. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  200. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
  201. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
  202. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  203. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
  204. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
  205. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
  206. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
  207. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  208. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  209. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  210. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  211. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  212. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
  213. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
  214. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
  215. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
  216. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  217. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
  218. package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
  219. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  220. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
  221. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
  222. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
  223. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  224. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  225. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
  226. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
  227. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
  228. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
  229. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
  230. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
  231. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
  232. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
  233. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  234. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
  235. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
  236. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  237. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
  238. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
  239. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
  240. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
  241. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  242. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  243. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  244. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  245. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  246. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
  247. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
  248. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
  249. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
  250. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  251. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
  252. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
  253. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  254. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
  255. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
  256. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
  257. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  258. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  259. package/jest/mock.js +24 -0
  260. package/package.json +1 -1
  261. package/src/NativeRNLlama.ts +46 -2
  262. package/src/index.ts +105 -1
  263. package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  264. package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
  265. package/cpp/ggml-cpu/sgemm.cpp +0 -3544
  266. package/cpp/ggml-cpu/sgemm.h +0 -14
  267. package/cpp/llama-kv-cache.cpp +0 -2827
  268. package/cpp/llama-kv-cache.h +0 -515
  269. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  270. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  271. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  272. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
  273. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  274. /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
  275. /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
  276. /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
package/cpp/ggml.c CHANGED
@@ -61,9 +61,6 @@
61
61
  #define m512i(p) (__m512i)(p)
62
62
  #endif
63
63
 
64
- // precomputed f32 table for f16 (256 KB) (ggml-impl.h)
65
- float lm_ggml_table_f32_f16[1 << 16];
66
-
67
64
  #if defined(__linux__) || \
68
65
  defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
69
66
  (defined(__APPLE__) && !TARGET_OS_TV && !TARGET_OS_WATCH)
@@ -133,7 +130,7 @@ static void lm_ggml_print_backtrace_symbols(void) {
133
130
  }
134
131
  #endif
135
132
 
136
- static void lm_ggml_print_backtrace(void) {
133
+ void lm_ggml_print_backtrace(void) {
137
134
  const char * LM_GGML_NO_BACKTRACE = getenv("LM_GGML_NO_BACKTRACE");
138
135
  if (LM_GGML_NO_BACKTRACE) {
139
136
  return;
@@ -160,6 +157,10 @@ static void lm_ggml_print_backtrace(void) {
160
157
  const int parent_pid = getpid();
161
158
  const int child_pid = fork();
162
159
  if (child_pid < 0) { // error
160
+ #if defined(__linux__)
161
+ close(lock[1]);
162
+ close(lock[0]);
163
+ #endif
163
164
  return;
164
165
  } else if (child_pid == 0) { // child
165
166
  char attach[32];
@@ -167,6 +168,7 @@ static void lm_ggml_print_backtrace(void) {
167
168
  #if defined(__linux__)
168
169
  close(lock[1]);
169
170
  (void) !read(lock[0], lock, 1);
171
+ close(lock[0]);
170
172
  #endif
171
173
  // try gdb
172
174
  execlp("gdb", "gdb", "--batch",
@@ -195,7 +197,7 @@ static void lm_ggml_print_backtrace(void) {
195
197
  }
196
198
  }
197
199
  #else
198
- static void lm_ggml_print_backtrace(void) {
200
+ void lm_ggml_print_backtrace(void) {
199
201
  // platform not supported
200
202
  }
201
203
  #endif
@@ -216,6 +218,8 @@ void lm_ggml_abort(const char * file, int line, const char * fmt, ...) {
216
218
  abort();
217
219
  }
218
220
 
221
+ // lm_ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
222
+
219
223
  //
220
224
  // logging
221
225
  //
@@ -894,12 +898,6 @@ struct lm_ggml_context {
894
898
  struct lm_ggml_object * objects_end;
895
899
  };
896
900
 
897
- struct lm_ggml_context_container {
898
- bool used;
899
-
900
- struct lm_ggml_context context;
901
- };
902
-
903
901
  //
904
902
  // data types
905
903
  //
@@ -967,6 +965,7 @@ static const char * LM_GGML_OP_NAME[LM_GGML_OP_COUNT] = {
967
965
  "UPSCALE",
968
966
  "PAD",
969
967
  "PAD_REFLECT_1D",
968
+ "ROLL",
970
969
  "ARANGE",
971
970
  "TIMESTEP_EMBEDDING",
972
971
  "ARGSORT",
@@ -997,7 +996,7 @@ static const char * LM_GGML_OP_NAME[LM_GGML_OP_COUNT] = {
997
996
  "OPT_STEP_ADAMW",
998
997
  };
999
998
 
1000
- static_assert(LM_GGML_OP_COUNT == 82, "LM_GGML_OP_COUNT != 82");
999
+ static_assert(LM_GGML_OP_COUNT == 83, "LM_GGML_OP_COUNT != 83");
1001
1000
 
1002
1001
  static const char * LM_GGML_OP_SYMBOL[LM_GGML_OP_COUNT] = {
1003
1002
  "none",
@@ -1062,6 +1061,7 @@ static const char * LM_GGML_OP_SYMBOL[LM_GGML_OP_COUNT] = {
1062
1061
  "upscale(x)",
1063
1062
  "pad(x)",
1064
1063
  "pad_reflect_1d(x)",
1064
+ "roll(x)",
1065
1065
  "arange(start, stop, step)",
1066
1066
  "timestep_embedding(timesteps, dim, max_period)",
1067
1067
  "argsort(x)",
@@ -1092,7 +1092,7 @@ static const char * LM_GGML_OP_SYMBOL[LM_GGML_OP_COUNT] = {
1092
1092
  "adamw(x)",
1093
1093
  };
1094
1094
 
1095
- static_assert(LM_GGML_OP_COUNT == 82, "LM_GGML_OP_COUNT != 82");
1095
+ static_assert(LM_GGML_OP_COUNT == 83, "LM_GGML_OP_COUNT != 83");
1096
1096
 
1097
1097
  static_assert(LM_GGML_OP_POOL_COUNT == 2, "LM_GGML_OP_POOL_COUNT != 2");
1098
1098
 
@@ -1432,14 +1432,6 @@ struct lm_ggml_context * lm_ggml_init(struct lm_ggml_init_params params) {
1432
1432
  // initialize time system (required on Windows)
1433
1433
  lm_ggml_time_init();
1434
1434
 
1435
- for (int i = 0; i < (1 << 16); ++i) {
1436
- union {
1437
- uint16_t u16;
1438
- lm_ggml_fp16_t fp16;
1439
- } u = {i};
1440
- lm_ggml_table_f32_f16[i] = LM_GGML_COMPUTE_FP16_TO_FP32(u.fp16);
1441
- }
1442
-
1443
1435
  is_first_call = false;
1444
1436
  }
1445
1437
 
@@ -2325,6 +2317,26 @@ struct lm_ggml_tensor * lm_ggml_repeat(
2325
2317
  return result;
2326
2318
  }
2327
2319
 
2320
+ struct lm_ggml_tensor * lm_ggml_repeat_4d(
2321
+ struct lm_ggml_context * ctx,
2322
+ struct lm_ggml_tensor * a,
2323
+ int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
2324
+ const bool can_repeat = lm_ggml_is_empty(a) || (
2325
+ (ne0 % a->ne[0] == 0) &&
2326
+ (ne1 % a->ne[1] == 0) &&
2327
+ (ne2 % a->ne[2] == 0) &&
2328
+ (ne3 % a->ne[3] == 0)
2329
+ );
2330
+ LM_GGML_ASSERT(can_repeat);
2331
+
2332
+ struct lm_ggml_tensor * result = lm_ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
2333
+
2334
+ result->op = LM_GGML_OP_REPEAT;
2335
+ result->src[0] = a;
2336
+
2337
+ return result;
2338
+ }
2339
+
2328
2340
  // lm_ggml_repeat_back
2329
2341
 
2330
2342
  struct lm_ggml_tensor * lm_ggml_repeat_back(
@@ -4333,6 +4345,34 @@ struct lm_ggml_tensor * lm_ggml_pad_reflect_1d(
4333
4345
  return result;
4334
4346
  }
4335
4347
 
4348
+ // lm_ggml_roll
4349
+
4350
+ struct lm_ggml_tensor * lm_ggml_roll(
4351
+ struct lm_ggml_context * ctx,
4352
+ struct lm_ggml_tensor * a,
4353
+ int shift0,
4354
+ int shift1,
4355
+ int shift2,
4356
+ int shift3) {
4357
+ LM_GGML_ASSERT(a->nb[0] == lm_ggml_type_size(a->type));
4358
+ LM_GGML_ASSERT(abs(shift0) < a->ne[0]);
4359
+ LM_GGML_ASSERT(abs(shift1) < a->ne[1]);
4360
+ LM_GGML_ASSERT(abs(shift2) < a->ne[2]);
4361
+ LM_GGML_ASSERT(abs(shift3) < a->ne[3]);
4362
+
4363
+ struct lm_ggml_tensor * result = lm_ggml_dup_tensor(ctx, a);
4364
+
4365
+ lm_ggml_set_op_params_i32(result, 0, shift0);
4366
+ lm_ggml_set_op_params_i32(result, 1, shift1);
4367
+ lm_ggml_set_op_params_i32(result, 2, shift2);
4368
+ lm_ggml_set_op_params_i32(result, 3, shift3);
4369
+
4370
+ result->op = LM_GGML_OP_ROLL;
4371
+ result->src[0] = a;
4372
+
4373
+ return result;
4374
+ }
4375
+
4336
4376
  // lm_ggml_arange
4337
4377
 
4338
4378
  struct lm_ggml_tensor * lm_ggml_arange(
package/cpp/ggml.h CHANGED
@@ -490,6 +490,7 @@ extern "C" {
490
490
  LM_GGML_OP_UPSCALE, // nearest interpolate
491
491
  LM_GGML_OP_PAD,
492
492
  LM_GGML_OP_PAD_REFLECT_1D,
493
+ LM_GGML_OP_ROLL,
493
494
  LM_GGML_OP_ARANGE,
494
495
  LM_GGML_OP_TIMESTEP_EMBEDDING,
495
496
  LM_GGML_OP_ARGSORT,
@@ -936,6 +937,15 @@ extern "C" {
936
937
  struct lm_ggml_tensor * a,
937
938
  struct lm_ggml_tensor * b);
938
939
 
940
+ // repeat a to the specified shape
941
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_repeat_4d(
942
+ struct lm_ggml_context * ctx,
943
+ struct lm_ggml_tensor * a,
944
+ int64_t ne0,
945
+ int64_t ne1,
946
+ int64_t ne2,
947
+ int64_t ne3);
948
+
939
949
  // sums repetitions in a into shape of b
940
950
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_repeat_back(
941
951
  struct lm_ggml_context * ctx,
@@ -1793,6 +1803,17 @@ extern "C" {
1793
1803
  int p0,
1794
1804
  int p1);
1795
1805
 
1806
+ // Move tensor elements by an offset given for each dimension. Elements that
1807
+ // are shifted beyond the last position are wrapped around to the beginning.
1808
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_roll(
1809
+ struct lm_ggml_context * ctx,
1810
+ struct lm_ggml_tensor * a,
1811
+ int shift0,
1812
+ int shift1,
1813
+ int shift2,
1814
+ int shift3);
1815
+
1816
+
1796
1817
  // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
1797
1818
  // timesteps: [N,]
1798
1819
  // return: [N, dim]
@@ -2087,9 +2108,6 @@ extern "C" {
2087
2108
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_graph_get_grad (const struct lm_ggml_cgraph * cgraph, const struct lm_ggml_tensor * node);
2088
2109
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_graph_get_grad_acc(const struct lm_ggml_cgraph * cgraph, const struct lm_ggml_tensor * node);
2089
2110
 
2090
- LM_GGML_API void lm_ggml_graph_export(const struct lm_ggml_cgraph * cgraph, const char * fname);
2091
- LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_import(const char * fname, struct lm_ggml_context ** ctx_data, struct lm_ggml_context ** ctx_eval);
2092
-
2093
2111
  // print info and performance information for the graph
2094
2112
  LM_GGML_API void lm_ggml_graph_print(const struct lm_ggml_cgraph * cgraph);
2095
2113
 
@@ -2173,6 +2191,7 @@ extern "C" {
2173
2191
 
2174
2192
  // scheduling priorities
2175
2193
  enum lm_ggml_sched_priority {
2194
+ LM_GGML_SCHED_PRIO_LOW = -1,
2176
2195
  LM_GGML_SCHED_PRIO_NORMAL,
2177
2196
  LM_GGML_SCHED_PRIO_MEDIUM,
2178
2197
  LM_GGML_SCHED_PRIO_HIGH,
package/cpp/gguf.cpp CHANGED
@@ -335,7 +335,11 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
335
335
 
336
336
  for (uint32_t i = 0; i < magic.size(); i++) {
337
337
  if (magic[i] != LM_GGUF_MAGIC[i]) {
338
- LM_GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
338
+ char c0 = isprint(magic[0]) ? magic[0] : '?';
339
+ char c1 = isprint(magic[1]) ? magic[1] : '?';
340
+ char c2 = isprint(magic[2]) ? magic[2] : '?';
341
+ char c3 = isprint(magic[3]) ? magic[3] : '?';
342
+ LM_GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, c0, c1, c2, c3);
339
343
  lm_gguf_free(ctx);
340
344
  return nullptr;
341
345
  }
@@ -347,11 +351,28 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
347
351
  int64_t n_tensors = 0;
348
352
 
349
353
  if (ok && gr.read(ctx->version)) {
350
- if (ctx->version == 1) {
354
+ if (ok && ctx->version == 0) {
355
+ LM_GGML_LOG_ERROR("%s: bad GGUF version: %" PRIu32 "\n", __func__, ctx->version);
356
+ ok = false;
357
+ }
358
+
359
+ /*
360
+ * bit layout is different when reading non-native endian models.
361
+ * assuming that the GGUF version is 3, the non-native endian model
362
+ * would read it as 0x30000000. we can use the AND operation against
363
+ * the last 4 hexadecimal digits to check if the model is the same
364
+ * endianness as the host system.
365
+ */
366
+ if (ok && (ctx->version & 0x0000FFFF) == 0x00000000) {
367
+ LM_GGML_LOG_ERROR("%s: failed to load model: this GGUF file version %" PRIu32 " is extremely large, is there a mismatch between the host and model endianness?\n", __func__, ctx->version);
368
+ ok = false;
369
+ }
370
+
371
+ if (ok && ctx->version == 1) {
351
372
  LM_GGML_LOG_ERROR("%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
352
373
  ok = false;
353
374
  }
354
- if (ctx->version > LM_GGUF_VERSION) {
375
+ if (ok && ctx->version > LM_GGUF_VERSION) {
355
376
  LM_GGML_LOG_ERROR("%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
356
377
  __func__, ctx->version, LM_GGUF_VERSION);
357
378
  ok = false;
@@ -0,0 +1,256 @@
1
+ #include "json-partial.h"
2
+
3
+ #include "log.h"
4
+
5
+ #include "nlohmann/json.hpp"
6
+
7
+ #include <string>
8
+
9
+ using json = nlohmann::ordered_json;
10
+
11
+ enum common_json_stack_element_type {
12
+ COMMON_JSON_STACK_ELEMENT_OBJECT,
13
+ COMMON_JSON_STACK_ELEMENT_KEY,
14
+ COMMON_JSON_STACK_ELEMENT_ARRAY,
15
+ };
16
+
17
+ struct common_json_stack_element {
18
+ common_json_stack_element_type type;
19
+ std::string key;
20
+ };
21
+
22
+ bool common_json_parse(
23
+ const std::string & input,
24
+ const std::string & healing_marker,
25
+ common_json & out)
26
+ {
27
+ std::string::const_iterator it = input.begin();
28
+ const auto end = input.end();
29
+ return common_json_parse(it, end, healing_marker, out);
30
+ }
31
+
32
+ bool common_json_parse(
33
+ std::string::const_iterator & it,
34
+ const std::string::const_iterator & end,
35
+ const std::string & healing_marker,
36
+ common_json & out)
37
+ {
38
+ // // https://json.nlohmann.me/features/parsing/sax_interface/
39
+ struct json_error_locator : public nlohmann::json_sax<json> {
40
+ std::size_t position;
41
+ bool found_error;
42
+ std::string last_token;
43
+ std::string exception_message;
44
+ std::vector<common_json_stack_element> stack;
45
+
46
+ json_error_locator() : position(0), found_error(false) {}
47
+
48
+ bool parse_error(std::size_t position, const std::string & last_token, const json::exception & ex) override { // NOLINT
49
+ this->position = position - 1;
50
+ this->found_error = true;
51
+ this->last_token = last_token;
52
+ this->exception_message = ex.what();
53
+ return false;
54
+ }
55
+ void close_value() {
56
+ if (!stack.empty() && (stack.back().type == COMMON_JSON_STACK_ELEMENT_KEY)) {
57
+ stack.pop_back();
58
+ }
59
+ }
60
+ bool null() override { // NOLINT
61
+ close_value();
62
+ return true;
63
+ }
64
+ bool boolean(bool) override { // NOLINT
65
+ close_value();
66
+ return true;
67
+ }
68
+ bool number_integer(number_integer_t) override { // NOLINT
69
+ close_value();
70
+ return true;
71
+ }
72
+ bool number_unsigned(number_unsigned_t) override { // NOLINT
73
+ close_value();
74
+ return true;
75
+ }
76
+ bool number_float(number_float_t, const string_t &) override { // NOLINT
77
+ close_value();
78
+ return true;
79
+ }
80
+ bool string(string_t &) override { // NOLINT
81
+ close_value();
82
+ return true;
83
+ }
84
+ bool binary(binary_t &) override { // NOLINT
85
+ close_value();
86
+ return true;
87
+ }
88
+ bool start_object(std::size_t) override { // NOLINT
89
+ stack.push_back({COMMON_JSON_STACK_ELEMENT_OBJECT, ""});
90
+ return true;
91
+ }
92
+ bool end_object() override {
93
+ LM_GGML_ASSERT(!stack.empty() && stack.back().type == COMMON_JSON_STACK_ELEMENT_OBJECT);
94
+ stack.pop_back();
95
+ close_value();
96
+ return true;
97
+ }
98
+ bool key(string_t & key) override { // NOLINT
99
+ stack.push_back({COMMON_JSON_STACK_ELEMENT_KEY, key});
100
+ return true;
101
+ }
102
+ bool start_array(std::size_t) override { // NOLINT
103
+ stack.push_back({COMMON_JSON_STACK_ELEMENT_ARRAY, ""});
104
+ return true;
105
+ }
106
+ bool end_array() override {
107
+ LM_GGML_ASSERT(!stack.empty() && stack.back().type == COMMON_JSON_STACK_ELEMENT_ARRAY);
108
+ stack.pop_back();
109
+ close_value();
110
+ return true;
111
+ }
112
+ };
113
+ json_error_locator err_loc;
114
+ auto start = it;
115
+ json::sax_parse(it, end, &err_loc);
116
+
117
+ if (err_loc.found_error) {
118
+ it = start;
119
+ auto temptative_end = it + err_loc.position;
120
+ // LOG_DBG("Error at position %zu (is_end = %s): %s\n", err_loc.position, temptative_end == end ? "true" : "false", err_loc.exception_message.c_str());
121
+
122
+ auto input = std::string(it, temptative_end);
123
+ try {
124
+ out.json = json::parse(input);
125
+ // out.json = json::parse(it, temptative_end);
126
+ it = temptative_end;
127
+ return true;
128
+ } catch (const std::exception & ex) {
129
+ // No, needs healing.
130
+ LOG_DBG("Failed to parse up to error: %s: <<<%s>>>\n", ex.what(), std::string(it, temptative_end).c_str());
131
+ }
132
+ auto can_parse = [](const std::string & str) {
133
+ try {
134
+ auto _ = json::parse(str); // NOLINT
135
+ return true;
136
+ } catch (const std::exception &) {
137
+ return false;
138
+ }
139
+ };
140
+ if (!healing_marker.empty() && !err_loc.stack.empty()) {
141
+ std::string str(it, temptative_end);
142
+ auto last_non_sp_pos = str.find_last_not_of(" \n\r\t");
143
+ if (last_non_sp_pos == std::string::npos) {
144
+ throw std::runtime_error("Cannot heal a truncated JSON that stopped in an unknown location");
145
+ }
146
+ auto last_non_sp_char = str[last_non_sp_pos];
147
+ // Used to detect stops on a number, which may not be complete.
148
+ auto was_maybe_number = [&]() {
149
+ if (!str.empty() && std::isspace(str.back())) {
150
+ return false;
151
+ }
152
+ return std::isdigit(last_non_sp_char) ||
153
+ last_non_sp_char == '.' ||
154
+ last_non_sp_char == 'e' ||
155
+ last_non_sp_char == 'E' ||
156
+ last_non_sp_char == '-';
157
+ };
158
+
159
+ std::string closing;
160
+ for (size_t i = err_loc.stack.size(); i > 0; i--) {
161
+ auto & el = err_loc.stack[i - 1];
162
+ if (el.type == COMMON_JSON_STACK_ELEMENT_OBJECT) {
163
+ closing += "}";
164
+ } else if (el.type == COMMON_JSON_STACK_ELEMENT_ARRAY) {
165
+ closing += "]";
166
+ } else if (el.type != COMMON_JSON_STACK_ELEMENT_KEY) {
167
+ throw std::runtime_error("Unexpected stack element type");
168
+ }
169
+ }
170
+
171
+ const auto & magic_seed = out.healing_marker.marker = healing_marker;//"$llama.cpp.json$";
172
+
173
+ if (err_loc.stack.back().type == COMMON_JSON_STACK_ELEMENT_KEY) {
174
+ // We're inside an object value
175
+ if (last_non_sp_char == ':' && can_parse(str + "1" + closing)) {
176
+ // Was about to create an object value
177
+ str += (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
178
+ } else if (can_parse(str + ": 1" + closing)) {
179
+ str += (out.healing_marker.json_dump_marker = ":\"" + magic_seed) + "\"" + closing;
180
+ } else if (last_non_sp_char == '{' && can_parse(str + closing)) {
181
+ // Was about to create an object
182
+ str += (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\": 1" + closing;
183
+ } else if (can_parse(str + "\"" + closing)) {
184
+ // Was inside an object value string
185
+ str += (out.healing_marker.json_dump_marker = magic_seed) + "\"" + closing;
186
+ } else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\"" + closing)) {
187
+ // Was inside an object value string after an escape
188
+ str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\"" + closing;
189
+ } else {
190
+ // find last :
191
+ auto last_pos = str.find_last_of(':');
192
+ if (last_pos == std::string::npos) {
193
+ throw std::runtime_error("Cannot heal a truncated JSON that stopped in an unknown location");
194
+ }
195
+ // Cutting back to opening : for object value
196
+ str = str.substr(0, last_pos + 1) + (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
197
+ }
198
+ } else if (err_loc.stack.back().type == COMMON_JSON_STACK_ELEMENT_ARRAY) {
199
+ if ((last_non_sp_char == ',' || last_non_sp_char == '[') && can_parse(str + "1" + closing)) {
200
+ // Was about to create an array value
201
+ str += (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
202
+ } else if (can_parse(str + "\"" + closing)) {
203
+ // Was inside an array value string
204
+ str += (out.healing_marker.json_dump_marker = magic_seed) + "\"" + closing;
205
+ } else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\"" + closing)) {
206
+ // Was inside an array value string after an escape
207
+ str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\"" + closing;
208
+ } else if (!was_maybe_number() && can_parse(str + ", 1" + closing)) {
209
+ // Had just finished a value
210
+ str += (out.healing_marker.json_dump_marker = ",\"" + magic_seed) + "\"" + closing;
211
+ } else {
212
+ auto last_pos = str.find_last_of("[,");
213
+ if (last_pos == std::string::npos) {
214
+ throw std::runtime_error("Cannot heal a truncated JSON array stopped in an unknown location");
215
+ }
216
+ // Cutting back to last [ or , for array value
217
+ str = str.substr(0, last_pos + 1) + (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
218
+ }
219
+ } else if (err_loc.stack.back().type == COMMON_JSON_STACK_ELEMENT_OBJECT) {
220
+ if ((last_non_sp_char == '{' && can_parse(str + closing)) ||
221
+ (last_non_sp_char == ',' && can_parse(str + "\"\": 1" + closing))) {
222
+ // Was about to create an object key+value
223
+ str += (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\": 1" + closing;
224
+ } else if (!was_maybe_number() && can_parse(str + ",\"\": 1" + closing)) {
225
+ // Was about to create an object key+value
226
+ str += (out.healing_marker.json_dump_marker = ",\"" + magic_seed) + "\": 1" + closing;
227
+ } else if (can_parse(str + "\": 1" + closing)) {
228
+ // Was inside an object key string
229
+ str += (out.healing_marker.json_dump_marker = magic_seed) + "\": 1" + closing;
230
+ } else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\": 1" + closing)) {
231
+ // Was inside an object key string after an escape
232
+ str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\": 1" + closing;
233
+ } else {
234
+ auto last_pos = str.find_last_of(':');
235
+ if (last_pos == std::string::npos) {
236
+ throw std::runtime_error("Cannot heal a truncated JSON object stopped in an unknown location");
237
+ }
238
+ // fprintf(stderr, "Cutting back to last : for object key+value\n");
239
+ str = str.substr(0, last_pos + 1) + (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
240
+ }
241
+ } else {
242
+ throw std::runtime_error("Cannot heal a truncated JSON object stopped in an unknown location");
243
+ }
244
+ // fprintf(stderr, "HEALED:\nSTRING <<<\n%s\n>>>\n\nmagic_cut: <<<\n%s\n>>>\n\n", str.c_str(), out.healing_marker.json_dump_marker.c_str());
245
+ out.json = json::parse(str);
246
+ it = temptative_end;
247
+ return true;
248
+ }
249
+ // TODO: handle unclosed top-level primitive if the stack was empty but we got an error (e.g. "tru", "\"", etc...)
250
+ // fprintf(stderr, "Closing: TODO\n");
251
+ return false;
252
+ }
253
+ out.json = json::parse(it, end);
254
+ it = end;
255
+ return true;
256
+ }
@@ -0,0 +1,38 @@
1
+ #pragma once
2
+
3
+ #include "nlohmann/json.hpp"
4
+
5
+ // Healing marker (empty if the JSON was fully parsed / wasn't healed).
6
+ struct common_healing_marker {
7
+ // Raw marker.
8
+ std::string marker;
9
+
10
+ // Cutting the `common_json.json.dump()` string at the (only) occurrence of this marker should yield the original partial JSON string (modulo spaces / if it had the same dump format).
11
+ std::string json_dump_marker;
12
+ };
13
+
14
+ // Represents a parsed JSON object, with its optional healing marker (a JSON dump fragment that can be used to find the position of healing in the JSON dump string)
15
+ struct common_json {
16
+ nlohmann::ordered_json json;
17
+
18
+ common_healing_marker healing_marker;
19
+ };
20
+
21
+ // Parse the JSON string, healing (closing) any partial JSON if `healing_marker` is not empty.
22
+ //
23
+ // Healing completes partial JSON strings by adding a (possibly modified) healing marker, then whatever is needed to close the JSON.
24
+ // This allows to parse the resulting healed JSON string, yet be able to cut it again if needed at the healing marker.
25
+ // (this is used when parsing JSON outputs from the models, then crafting partial JSONs for the partial tool calls in OAI format).
26
+ //
27
+ // For instance, parsing `{` with a healing marker `foo` will produce a healed JSON `{"foo":1}`, w/ json_dump_marker = `"foo"` (which can be used to break the JSON again).
28
+ bool common_json_parse(
29
+ const std::string & input,
30
+ const std::string & healing_marker,
31
+ common_json & out);
32
+
33
+ // Parse the JSON string (see overload above), but advancing an iterator to the end of the input when the (potentially partial) parsing succeeds.
34
+ bool common_json_parse(
35
+ std::string::const_iterator & it,
36
+ const std::string::const_iterator & end,
37
+ const std::string & healing_marker,
38
+ common_json & out);
@@ -1,8 +1,9 @@
1
1
  #include "json-schema-to-grammar.h"
2
2
  #include "common.h"
3
3
 
4
+ #include "nlohmann/json.hpp"
5
+
4
6
  #include <algorithm>
5
- #include <fstream>
6
7
  #include <map>
7
8
  #include <regex>
8
9
  #include <sstream>
@@ -40,49 +41,6 @@ static std::string build_repetition(const std::string & item_rule, int min_items
40
41
  return result;
41
42
  }
42
43
 
43
- /* Minimalistic replacement for std::string_view, which is only available from C++17 onwards */
44
- class string_view {
45
- const std::string & _str;
46
- const size_t _start;
47
- const size_t _end;
48
- public:
49
- string_view(const std::string & str, size_t start = 0, size_t end = std::string::npos) : _str(str), _start(start), _end(end == std::string::npos ? str.length() : end) {}
50
-
51
- size_t size() const {
52
- return _end - _start;
53
- }
54
-
55
- size_t length() const {
56
- return size();
57
- }
58
-
59
- operator std::string() const {
60
- return str();
61
- }
62
-
63
- std::string str() const {
64
- return _str.substr(_start, _end - _start);
65
- }
66
-
67
- string_view substr(size_t pos, size_t len = std::string::npos) const {
68
- return string_view(_str, _start + pos, len == std::string::npos ? _end : _start + pos + len);
69
- }
70
-
71
- char operator[](size_t pos) const {
72
- auto index = _start + pos;
73
- if (index >= _end) {
74
- throw std::out_of_range("string_view index out of range");
75
- }
76
- return _str[_start + pos];
77
- }
78
-
79
- bool operator==(const string_view & other) const {
80
- std::string this_str = *this;
81
- std::string other_str = other;
82
- return this_str == other_str;
83
- }
84
- };
85
-
86
44
  static void _build_min_max_int(int min_value, int max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
87
45
  auto has_min = min_value != std::numeric_limits<int>::min();
88
46
  auto has_max = max_value != std::numeric_limits<int>::max();
@@ -111,14 +69,14 @@ static void _build_min_max_int(int min_value, int max_value, std::stringstream &
111
69
  }
112
70
  out << "}";
113
71
  };
114
- std::function<void(const string_view &, const string_view &)> uniform_range =
115
- [&](const string_view & from, const string_view & to) {
72
+ std::function<void(const std::string_view &, const std::string_view &)> uniform_range =
73
+ [&](const std::string_view & from, const std::string_view & to) {
116
74
  size_t i = 0;
117
75
  while (i < from.length() && i < to.length() && from[i] == to[i]) {
118
76
  i++;
119
77
  }
120
78
  if (i > 0) {
121
- out << "\"" << from.substr(0, i).str() << "\"";
79
+ out << "\"" << from.substr(0, i) << "\"";
122
80
  }
123
81
  if (i < from.length() && i < to.length()) {
124
82
  if (i > 0) {
@@ -1,9 +1,9 @@
1
1
  #pragma once
2
2
 
3
- #include "ggml.h"
4
- // Change JSON_ASSERT from assert() to LM_GGML_ASSERT:
5
- #define JSON_ASSERT LM_GGML_ASSERT
6
- #include "json.hpp"
3
+ #include "nlohmann/json_fwd.hpp"
4
+
5
+ #include <functional>
6
+ #include <string>
7
7
 
8
8
  std::string json_schema_to_grammar(const nlohmann::ordered_json & schema,
9
9
  bool force_gbnf = false);