cui-llama.rn 1.5.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. package/LICENSE +20 -20
  2. package/README.md +345 -319
  3. package/android/build.gradle +116 -116
  4. package/android/gradle.properties +5 -5
  5. package/android/src/main/AndroidManifest.xml +4 -4
  6. package/android/src/main/CMakeLists.txt +129 -124
  7. package/android/src/main/java/com/rnllama/LlamaContext.java +648 -645
  8. package/android/src/main/java/com/rnllama/RNLlama.java +695 -695
  9. package/android/src/main/java/com/rnllama/RNLlamaPackage.java +48 -48
  10. package/android/src/main/jni-utils.h +100 -100
  11. package/android/src/main/jni.cpp +1279 -1263
  12. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  14. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  15. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  16. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  17. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  18. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  19. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  20. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +135 -135
  21. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +136 -136
  22. package/cpp/LICENSE +21 -0
  23. package/cpp/README.md +4 -4
  24. package/cpp/chat.cpp +1 -1
  25. package/cpp/common.cpp +17 -2
  26. package/cpp/common.h +7 -3
  27. package/cpp/ggml-alloc.c +4 -1
  28. package/cpp/ggml-cpp.h +1 -1
  29. package/cpp/ggml-cpu/amx/amx.cpp +221 -0
  30. package/cpp/ggml-cpu/amx/amx.h +8 -0
  31. package/cpp/ggml-cpu/amx/common.h +91 -0
  32. package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
  33. package/cpp/ggml-cpu/amx/mmq.h +10 -0
  34. package/cpp/{binary-ops.h → ggml-cpu/binary-ops.h} +1 -1
  35. package/cpp/ggml-cpu/common.h +72 -0
  36. package/cpp/{ggml-cpu-aarch64.cpp → ggml-cpu/ggml-cpu-aarch64.cpp} +809 -101
  37. package/cpp/{ggml-cpu.c → ggml-cpu/ggml-cpu.c} +109 -42
  38. package/cpp/{ggml-cpu.cpp → ggml-cpu/ggml-cpu.cpp} +3 -0
  39. package/cpp/{ops.cpp → ggml-cpu/ops.cpp} +246 -160
  40. package/cpp/{ops.h → ggml-cpu/ops.h} +2 -20
  41. package/cpp/{sgemm.cpp → ggml-cpu/sgemm.cpp} +501 -0
  42. package/cpp/{simd-mappings.h → ggml-cpu/simd-mappings.h} +7 -3
  43. package/cpp/{unary-ops.h → ggml-cpu/unary-ops.h} +1 -1
  44. package/cpp/ggml-cpu.h +5 -0
  45. package/cpp/ggml-impl.h +16 -9
  46. package/cpp/ggml-llama-sim.metallib +0 -0
  47. package/cpp/ggml-llama.metallib +0 -0
  48. package/cpp/ggml-metal-impl.h +597 -597
  49. package/cpp/ggml-metal.m +496 -47
  50. package/cpp/ggml.c +134 -244
  51. package/cpp/ggml.h +62 -95
  52. package/cpp/json-schema-to-grammar.cpp +3 -0
  53. package/cpp/llama-arch.cpp +46 -17
  54. package/cpp/llama-arch.h +9 -0
  55. package/cpp/llama-batch.cpp +5 -1
  56. package/cpp/llama-batch.h +2 -1
  57. package/cpp/llama-chat.cpp +31 -10
  58. package/cpp/llama-chat.h +3 -2
  59. package/cpp/llama-context.cpp +104 -489
  60. package/cpp/llama-context.h +14 -30
  61. package/cpp/llama-graph.cpp +69 -62
  62. package/cpp/llama-graph.h +21 -18
  63. package/cpp/llama-hparams.h +5 -0
  64. package/cpp/llama-kv-cache.cpp +1497 -391
  65. package/cpp/llama-kv-cache.h +272 -80
  66. package/cpp/llama-memory.h +11 -1
  67. package/cpp/llama-model.cpp +502 -176
  68. package/cpp/llama-model.h +13 -3
  69. package/cpp/llama-sampling.cpp +2 -1
  70. package/cpp/llama-vocab.cpp +8 -1
  71. package/cpp/llama.h +14 -11
  72. package/cpp/rn-llama.cpp +721 -873
  73. package/cpp/rn-llama.h +134 -138
  74. package/cpp/sampling.h +107 -107
  75. package/cpp/unicode-data.cpp +7034 -7034
  76. package/cpp/unicode-data.h +20 -20
  77. package/cpp/unicode.cpp +849 -849
  78. package/cpp/unicode.h +66 -66
  79. package/ios/CMakeLists.txt +119 -108
  80. package/ios/RNLlama.h +13 -7
  81. package/ios/RNLlama.mm +423 -405
  82. package/ios/RNLlamaContext.h +57 -57
  83. package/ios/RNLlamaContext.mm +833 -835
  84. package/ios/rnllama.xcframework/Info.plist +74 -74
  85. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +143 -0
  86. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +681 -0
  87. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
  88. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
  89. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  90. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
  91. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
  92. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
  93. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +143 -0
  94. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +601 -0
  95. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  96. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
  97. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
  98. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
  99. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
  100. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +2189 -0
  101. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/gguf.h +202 -0
  102. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  103. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json.hpp +24766 -0
  104. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
  105. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +437 -0
  106. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +89 -0
  107. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +57 -0
  108. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +249 -0
  109. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
  110. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
  111. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
  112. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +595 -0
  113. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +161 -0
  114. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
  115. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-io.h +35 -0
  116. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +405 -0
  117. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +31 -0
  118. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
  119. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
  120. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +419 -0
  121. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
  122. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +1437 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/log.h +132 -0
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +134 -0
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sampling.h +107 -0
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/speculative.h +28 -0
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
  131. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode.h +66 -0
  132. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
  133. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  134. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  135. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
  136. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +681 -0
  137. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
  138. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
  139. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  140. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
  141. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
  142. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
  143. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +143 -0
  144. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +601 -0
  145. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  146. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
  147. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
  148. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
  149. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
  150. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2189 -0
  151. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
  152. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  153. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
  154. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
  155. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +437 -0
  156. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +89 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +57 -0
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +249 -0
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
  160. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
  161. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
  162. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +595 -0
  163. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +161 -0
  164. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
  165. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
  166. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +405 -0
  167. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +31 -0
  168. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
  169. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
  170. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +419 -0
  171. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
  172. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
  173. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1437 -0
  174. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
  175. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  176. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  177. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +134 -0
  178. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
  179. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
  180. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
  181. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
  182. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  183. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
  184. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  185. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  186. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +143 -0
  187. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +681 -0
  188. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
  189. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
  190. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  191. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
  192. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
  193. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
  194. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +143 -0
  195. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +601 -0
  196. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  197. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
  198. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
  199. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
  200. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
  201. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +2189 -0
  202. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/gguf.h +202 -0
  203. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  204. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json.hpp +24766 -0
  205. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
  206. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +437 -0
  207. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +89 -0
  208. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +57 -0
  209. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +249 -0
  210. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
  211. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
  212. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
  213. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +595 -0
  214. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +161 -0
  215. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
  216. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-io.h +35 -0
  217. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +405 -0
  218. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +31 -0
  219. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
  220. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
  221. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +419 -0
  222. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
  223. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
  224. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +1437 -0
  225. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/log.h +132 -0
  226. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  227. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  228. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +134 -0
  229. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sampling.h +107 -0
  230. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/speculative.h +28 -0
  231. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
  232. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode.h +66 -0
  233. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
  234. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  235. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  236. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
  237. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +681 -0
  238. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
  239. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
  240. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  241. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
  242. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
  243. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
  244. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +143 -0
  245. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +601 -0
  246. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  247. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
  248. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
  249. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
  250. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
  251. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2189 -0
  252. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
  253. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  254. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
  255. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
  256. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +437 -0
  257. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +89 -0
  258. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +57 -0
  259. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +249 -0
  260. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
  261. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
  262. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
  263. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +595 -0
  264. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +161 -0
  265. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
  266. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
  267. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +405 -0
  268. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +31 -0
  269. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
  270. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
  271. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +419 -0
  272. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
  273. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
  274. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1437 -0
  275. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
  276. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  277. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  278. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +134 -0
  279. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
  280. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
  281. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
  282. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
  283. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  284. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
  285. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  286. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  287. package/jest/mock.js +203 -203
  288. package/lib/commonjs/NativeRNLlama.js +1 -2
  289. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  290. package/lib/commonjs/chat.js.map +1 -1
  291. package/lib/commonjs/grammar.js +12 -31
  292. package/lib/commonjs/grammar.js.map +1 -1
  293. package/lib/commonjs/index.js +47 -47
  294. package/lib/commonjs/index.js.map +1 -1
  295. package/lib/commonjs/package.json +1 -0
  296. package/lib/module/NativeRNLlama.js +2 -0
  297. package/lib/module/NativeRNLlama.js.map +1 -1
  298. package/lib/module/chat.js +2 -0
  299. package/lib/module/chat.js.map +1 -1
  300. package/lib/module/grammar.js +14 -31
  301. package/lib/module/grammar.js.map +1 -1
  302. package/lib/module/index.js +47 -45
  303. package/lib/module/index.js.map +1 -1
  304. package/lib/module/package.json +1 -0
  305. package/lib/typescript/NativeRNLlama.d.ts +10 -4
  306. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  307. package/lib/typescript/index.d.ts.map +1 -1
  308. package/llama-rn.podspec +48 -48
  309. package/package.json +233 -233
  310. package/src/NativeRNLlama.ts +431 -426
  311. package/src/chat.ts +44 -44
  312. package/src/grammar.ts +854 -854
  313. package/src/index.ts +495 -487
  314. /package/cpp/{binary-ops.cpp → ggml-cpu/binary-ops.cpp} +0 -0
  315. /package/cpp/{ggml-cpu-aarch64.h → ggml-cpu/ggml-cpu-aarch64.h} +0 -0
  316. /package/cpp/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -0
  317. /package/cpp/{ggml-cpu-quants.c → ggml-cpu/ggml-cpu-quants.c} +0 -0
  318. /package/cpp/{ggml-cpu-quants.h → ggml-cpu/ggml-cpu-quants.h} +0 -0
  319. /package/cpp/{ggml-cpu-traits.cpp → ggml-cpu/ggml-cpu-traits.cpp} +0 -0
  320. /package/cpp/{ggml-cpu-traits.h → ggml-cpu/ggml-cpu-traits.h} +0 -0
  321. /package/cpp/{sgemm.h → ggml-cpu/sgemm.h} +0 -0
  322. /package/cpp/{unary-ops.cpp → ggml-cpu/unary-ops.cpp} +0 -0
  323. /package/cpp/{vec.cpp → ggml-cpu/vec.cpp} +0 -0
  324. /package/cpp/{vec.h → ggml-cpu/vec.h} +0 -0
package/ios/RNLlama.mm CHANGED
@@ -1,405 +1,423 @@
1
- #import "RNLlama.h"
2
- #import "RNLlamaContext.h"
3
-
4
- #ifdef RCT_NEW_ARCH_ENABLED
5
- #import "RNLlamaSpec.h"
6
- #endif
7
-
8
- @implementation RNLlama
9
-
10
- NSMutableDictionary *llamaContexts;
11
- double llamaContextLimit = -1;
12
- dispatch_queue_t llamaDQueue;
13
-
14
- RCT_EXPORT_MODULE()
15
-
16
- RCT_EXPORT_METHOD(toggleNativeLog:(BOOL)enabled) {
17
- void (^onEmitLog)(NSString *level, NSString *text) = nil;
18
- if (enabled) {
19
- onEmitLog = ^(NSString *level, NSString *text) {
20
- [self sendEventWithName:@"@RNLlama_onNativeLog" body:@{ @"level": level, @"text": text }];
21
- };
22
- }
23
- [RNLlamaContext toggleNativeLog:enabled onEmitLog:onEmitLog];
24
- }
25
-
26
- RCT_EXPORT_METHOD(setContextLimit:(double)limit
27
- withResolver:(RCTPromiseResolveBlock)resolve
28
- withRejecter:(RCTPromiseRejectBlock)reject)
29
- {
30
- llamaContextLimit = limit;
31
- resolve(nil);
32
- }
33
-
34
- RCT_EXPORT_METHOD(modelInfo:(NSString *)path
35
- withSkip:(NSArray *)skip
36
- withResolver:(RCTPromiseResolveBlock)resolve
37
- withRejecter:(RCTPromiseRejectBlock)reject)
38
- {
39
- resolve([RNLlamaContext modelInfo:path skip:skip]);
40
- }
41
-
42
- RCT_EXPORT_METHOD(initContext:(double)contextId
43
- withContextParams:(NSDictionary *)contextParams
44
- withResolver:(RCTPromiseResolveBlock)resolve
45
- withRejecter:(RCTPromiseRejectBlock)reject)
46
- {
47
- NSNumber *contextIdNumber = [NSNumber numberWithDouble:contextId];
48
- if (llamaContexts[contextIdNumber] != nil) {
49
- reject(@"llama_error", @"Context already exists", nil);
50
- return;
51
- }
52
-
53
- if (llamaDQueue == nil) {
54
- llamaDQueue = dispatch_queue_create("com.rnllama", DISPATCH_QUEUE_SERIAL);
55
- }
56
-
57
- if (llamaContexts == nil) {
58
- llamaContexts = [[NSMutableDictionary alloc] init];
59
- }
60
-
61
- if (llamaContextLimit > -1 && [llamaContexts count] >= llamaContextLimit) {
62
- reject(@"llama_error", @"Context limit reached", nil);
63
- return;
64
- }
65
-
66
- @try {
67
- RNLlamaContext *context = [RNLlamaContext initWithParams:contextParams onProgress:^(unsigned int progress) {
68
- dispatch_async(dispatch_get_main_queue(), ^{
69
- [self sendEventWithName:@"@RNLlama_onInitContextProgress" body:@{ @"contextId": @(contextId), @"progress": @(progress) }];
70
- });
71
- }];
72
- if (![context isModelLoaded]) {
73
- reject(@"llama_cpp_error", @"Failed to load the model", nil);
74
- return;
75
- }
76
-
77
- [llamaContexts setObject:context forKey:contextIdNumber];
78
-
79
- resolve(@{
80
- @"gpu": @([context isMetalEnabled]),
81
- @"reasonNoGPU": [context reasonNoMetal],
82
- @"model": [context modelInfo],
83
- });
84
- } @catch (NSException *exception) {
85
- reject(@"llama_cpp_error", exception.reason, nil);
86
- }
87
- }
88
-
89
- RCT_EXPORT_METHOD(getFormattedChat:(double)contextId
90
- withMessages:(NSString *)messages
91
- withTemplate:(NSString *)chatTemplate
92
- withParams:(NSDictionary *)params
93
- withResolver:(RCTPromiseResolveBlock)resolve
94
- withRejecter:(RCTPromiseRejectBlock)reject)
95
- {
96
- RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
97
- if (context == nil) {
98
- reject(@"llama_error", @"Context not found", nil);
99
- return;
100
- }
101
- try {
102
- if ([params[@"jinja"] boolValue]) {
103
- NSString *jsonSchema = params[@"json_schema"];
104
- NSString *tools = params[@"tools"];
105
- bool parallelToolCalls = [params[@"parallel_tool_calls"] boolValue];
106
- NSString *toolChoice = params[@"tool_choice"];
107
- resolve([context getFormattedChatWithJinja:messages withChatTemplate:chatTemplate withJsonSchema:jsonSchema withTools:tools withParallelToolCalls:parallelToolCalls withToolChoice:toolChoice]);
108
- } else {
109
- resolve([context getFormattedChat:messages withChatTemplate:chatTemplate]);
110
- }
111
- } catch (const std::exception& e) { // catch cpp exceptions
112
- reject(@"llama_error", [NSString stringWithUTF8String:e.what()], nil);
113
- }
114
- }
115
-
116
- RCT_EXPORT_METHOD(loadSession:(double)contextId
117
- withFilePath:(NSString *)filePath
118
- withResolver:(RCTPromiseResolveBlock)resolve
119
- withRejecter:(RCTPromiseRejectBlock)reject)
120
- {
121
- RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
122
- if (context == nil) {
123
- reject(@"llama_error", @"Context not found", nil);
124
- return;
125
- }
126
- if ([context isPredicting]) {
127
- reject(@"llama_error", @"Context is busy", nil);
128
- return;
129
- }
130
- dispatch_async(llamaDQueue, ^{
131
- @try {
132
- @autoreleasepool {
133
- resolve([context loadSession:filePath]);
134
- }
135
- } @catch (NSException *exception) {
136
- reject(@"llama_cpp_error", exception.reason, nil);
137
- }
138
- });
139
- }
140
-
141
- RCT_EXPORT_METHOD(saveSession:(double)contextId
142
- withFilePath:(NSString *)filePath
143
- withSize:(double)size
144
- withResolver:(RCTPromiseResolveBlock)resolve
145
- withRejecter:(RCTPromiseRejectBlock)reject)
146
- {
147
- RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
148
- if (context == nil) {
149
- reject(@"llama_error", @"Context not found", nil);
150
- return;
151
- }
152
- if ([context isPredicting]) {
153
- reject(@"llama_error", @"Context is busy", nil);
154
- return;
155
- }
156
- dispatch_async(llamaDQueue, ^{
157
- @try {
158
- @autoreleasepool {
159
- int count = [context saveSession:filePath size:(int)size];
160
- resolve(@(count));
161
- }
162
- } @catch (NSException *exception) {
163
- reject(@"llama_cpp_error", exception.reason, nil);
164
- }
165
- });
166
- }
167
-
168
- - (NSArray *)supportedEvents {
169
- return@[
170
- @"@RNLlama_onInitContextProgress",
171
- @"@RNLlama_onToken",
172
- @"@RNLlama_onNativeLog",
173
- ];
174
- }
175
-
176
- RCT_EXPORT_METHOD(completion:(double)contextId
177
- withCompletionParams:(NSDictionary *)completionParams
178
- withResolver:(RCTPromiseResolveBlock)resolve
179
- withRejecter:(RCTPromiseRejectBlock)reject)
180
- {
181
- RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
182
- if (context == nil) {
183
- reject(@"llama_error", @"Context not found", nil);
184
- return;
185
- }
186
- if ([context isPredicting]) {
187
- reject(@"llama_error", @"Context is busy", nil);
188
- return;
189
- }
190
- dispatch_async(llamaDQueue, ^{
191
- @try {
192
- @autoreleasepool {
193
- NSDictionary* completionResult = [context completion:completionParams
194
- onToken:^(NSMutableDictionary *tokenResult) {
195
- if (![completionParams[@"emit_partial_completion"] boolValue]) return;
196
- dispatch_async(dispatch_get_main_queue(), ^{
197
- [self sendEventWithName:@"@RNLlama_onToken"
198
- body:@{
199
- @"contextId": [NSNumber numberWithDouble:contextId],
200
- @"tokenResult": tokenResult
201
- }
202
- ];
203
- [tokenResult release];
204
- });
205
- }
206
- ];
207
- resolve(completionResult);
208
- }
209
- } @catch (NSException *exception) {
210
- reject(@"llama_cpp_error", exception.reason, nil);
211
- [context stopCompletion];
212
- }
213
- });
214
-
215
- }
216
-
217
- RCT_EXPORT_METHOD(stopCompletion:(double)contextId
218
- withResolver:(RCTPromiseResolveBlock)resolve
219
- withRejecter:(RCTPromiseRejectBlock)reject)
220
- {
221
- RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
222
- if (context == nil) {
223
- reject(@"llama_error", @"Context not found", nil);
224
- return;
225
- }
226
- [context stopCompletion];
227
- resolve(nil);
228
- }
229
-
230
- RCT_EXPORT_METHOD(tokenize:(double)contextId
231
- text:(NSString *)text
232
- withResolver:(RCTPromiseResolveBlock)resolve
233
- withRejecter:(RCTPromiseRejectBlock)reject)
234
- {
235
- RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
236
- if (context == nil) {
237
- reject(@"llama_error", @"Context not found", nil);
238
- return;
239
- }
240
- NSMutableArray *tokens = [context tokenize:text];
241
- resolve(@{ @"tokens": tokens });
242
- [tokens release];
243
- }
244
-
245
- RCT_EXPORT_METHOD(detokenize:(double)contextId
246
- tokens:(NSArray *)tokens
247
- withResolver:(RCTPromiseResolveBlock)resolve
248
- withRejecter:(RCTPromiseRejectBlock)reject)
249
- {
250
- RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
251
- if (context == nil) {
252
- reject(@"llama_error", @"Context not found", nil);
253
- return;
254
- }
255
- resolve([context detokenize:tokens]);
256
- }
257
-
258
- RCT_EXPORT_METHOD(embedding:(double)contextId
259
- text:(NSString *)text
260
- params:(NSDictionary *)params
261
- withResolver:(RCTPromiseResolveBlock)resolve
262
- withRejecter:(RCTPromiseRejectBlock)reject)
263
- {
264
- RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
265
- if (context == nil) {
266
- reject(@"llama_error", @"Context not found", nil);
267
- return;
268
- }
269
- @try {
270
- NSDictionary *embedding = [context embedding:text params:params];
271
- resolve(embedding);
272
- } @catch (NSException *exception) {
273
- reject(@"llama_cpp_error", exception.reason, nil);
274
- }
275
- }
276
-
277
- RCT_EXPORT_METHOD(bench:(double)contextId
278
- pp:(int)pp
279
- tg:(int)tg
280
- pl:(int)pl
281
- nr:(int)nr
282
- withResolver:(RCTPromiseResolveBlock)resolve
283
- withRejecter:(RCTPromiseRejectBlock)reject)
284
- {
285
- RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
286
- if (context == nil) {
287
- reject(@"llama_error", @"Context not found", nil);
288
- return;
289
- }
290
- @try {
291
- NSString *benchResults = [context bench:pp tg:tg pl:pl nr:nr];
292
- resolve(benchResults);
293
- } @catch (NSException *exception) {
294
- reject(@"llama_cpp_error", exception.reason, nil);
295
- }
296
- }
297
-
298
- RCT_EXPORT_METHOD(applyLoraAdapters:(double)contextId
299
- withLoraAdapters:(NSArray *)loraAdapters
300
- withResolver:(RCTPromiseResolveBlock)resolve
301
- withRejecter:(RCTPromiseRejectBlock)reject)
302
- {
303
- RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
304
- if (context == nil) {
305
- reject(@"llama_error", @"Context not found", nil);
306
- return;
307
- }
308
- if ([context isPredicting]) {
309
- reject(@"llama_error", @"Context is busy", nil);
310
- return;
311
- }
312
- [context applyLoraAdapters:loraAdapters];
313
- resolve(nil);
314
- }
315
-
316
- RCT_EXPORT_METHOD(removeLoraAdapters:(double)contextId
317
- withResolver:(RCTPromiseResolveBlock)resolve
318
- withRejecter:(RCTPromiseRejectBlock)reject)
319
- {
320
- RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
321
- if (context == nil) {
322
- reject(@"llama_error", @"Context not found", nil);
323
- return;
324
- }
325
- if ([context isPredicting]) {
326
- reject(@"llama_error", @"Context is busy", nil);
327
- return;
328
- }
329
- [context removeLoraAdapters];
330
- resolve(nil);
331
- }
332
-
333
- RCT_EXPORT_METHOD(getLoadedLoraAdapters:(double)contextId
334
- withResolver:(RCTPromiseResolveBlock)resolve
335
- withRejecter:(RCTPromiseRejectBlock)reject)
336
- {
337
- RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
338
- if (context == nil) {
339
- reject(@"llama_error", @"Context not found", nil);
340
- return;
341
- }
342
- resolve([context getLoadedLoraAdapters]);
343
- }
344
-
345
- RCT_EXPORT_METHOD(releaseContext:(double)contextId
346
- withResolver:(RCTPromiseResolveBlock)resolve
347
- withRejecter:(RCTPromiseRejectBlock)reject)
348
- {
349
- RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
350
- if (context == nil) {
351
- reject(@"llama_error", @"Context not found", nil);
352
- return;
353
- }
354
- if (![context isModelLoaded]) {
355
- [context interruptLoad];
356
- }
357
- [context stopCompletion];
358
- dispatch_barrier_sync(llamaDQueue, ^{});
359
- [context invalidate];
360
- [llamaContexts removeObjectForKey:[NSNumber numberWithDouble:contextId]];
361
- resolve(nil);
362
- }
363
-
364
- RCT_EXPORT_METHOD(releaseAllContexts:(RCTPromiseResolveBlock)resolve
365
- withRejecter:(RCTPromiseRejectBlock)reject)
366
- {
367
- [self invalidate];
368
- resolve(nil);
369
- }
370
-
371
-
372
- - (void)invalidate {
373
- if (llamaContexts == nil) {
374
- return;
375
- }
376
-
377
- for (NSNumber *contextId in llamaContexts) {
378
- RNLlamaContext *context = llamaContexts[contextId];
379
- [context stopCompletion];
380
- dispatch_barrier_sync(llamaDQueue, ^{});
381
- [context invalidate];
382
- }
383
-
384
- [llamaContexts removeAllObjects];
385
- [llamaContexts release];
386
- llamaContexts = nil;
387
-
388
- if (llamaDQueue != nil) {
389
- dispatch_release(llamaDQueue);
390
- llamaDQueue = nil;
391
- }
392
-
393
- [super invalidate];
394
- }
395
-
396
- // Don't compile this code when we build for the old architecture.
397
- #ifdef RCT_NEW_ARCH_ENABLED
398
- - (std::shared_ptr<facebook::react::TurboModule>)getTurboModule:
399
- (const facebook::react::ObjCTurboModule::InitParams &)params
400
- {
401
- return std::make_shared<facebook::react::NativeRNLlamaSpecJSI>(params);
402
- }
403
- #endif
404
-
405
- @end
1
+ #import "RNLlama.h"
2
+ #import "RNLlamaContext.h"
3
+
4
+ #ifdef RCT_NEW_ARCH_ENABLED
5
+ #import "RNLlamaSpec.h"
6
+ #endif
7
+
8
+ @implementation RNLlama
9
+
10
+ NSMutableDictionary *llamaContexts;
11
+ double llamaContextLimit = -1;
12
+ dispatch_queue_t llamaDQueue;
13
+
14
+ RCT_EXPORT_MODULE()
15
+
16
+ RCT_EXPORT_METHOD(toggleNativeLog:(BOOL)enabled) {
17
+ void (^onEmitLog)(NSString *level, NSString *text) = nil;
18
+ if (enabled) {
19
+ onEmitLog = ^(NSString *level, NSString *text) {
20
+ [self sendEventWithName:@"@RNLlama_onNativeLog" body:@{ @"level": level, @"text": text }];
21
+ };
22
+ }
23
+ [RNLlamaContext toggleNativeLog:enabled onEmitLog:onEmitLog];
24
+ }
25
+
26
+ RCT_EXPORT_METHOD(setContextLimit:(double)limit
27
+ withResolver:(RCTPromiseResolveBlock)resolve
28
+ withRejecter:(RCTPromiseRejectBlock)reject)
29
+ {
30
+ llamaContextLimit = limit;
31
+ resolve(nil);
32
+ }
33
+
34
+ RCT_EXPORT_METHOD(modelInfo:(NSString *)path
35
+ withSkip:(NSArray *)skip
36
+ withResolver:(RCTPromiseResolveBlock)resolve
37
+ withRejecter:(RCTPromiseRejectBlock)reject)
38
+ {
39
+ resolve([RNLlamaContext modelInfo:path skip:skip]);
40
+ }
41
+
42
+ RCT_EXPORT_METHOD(initContext:(double)contextId
43
+ withContextParams:(NSDictionary *)contextParams
44
+ withResolver:(RCTPromiseResolveBlock)resolve
45
+ withRejecter:(RCTPromiseRejectBlock)reject)
46
+ {
47
+ NSNumber *contextIdNumber = [NSNumber numberWithDouble:contextId];
48
+ if (llamaContexts[contextIdNumber] != nil) {
49
+ reject(@"llama_error", @"Context already exists", nil);
50
+ return;
51
+ }
52
+
53
+ if (llamaDQueue == nil) {
54
+ llamaDQueue = dispatch_queue_create("com.rnllama", DISPATCH_QUEUE_SERIAL);
55
+ }
56
+
57
+ if (llamaContexts == nil) {
58
+ llamaContexts = [[NSMutableDictionary alloc] init];
59
+ }
60
+
61
+ if (llamaContextLimit > -1 && [llamaContexts count] >= llamaContextLimit) {
62
+ reject(@"llama_error", @"Context limit reached", nil);
63
+ return;
64
+ }
65
+
66
+ @try {
67
+ RNLlamaContext *context = [RNLlamaContext initWithParams:contextParams onProgress:^(unsigned int progress) {
68
+ dispatch_async(dispatch_get_main_queue(), ^{
69
+ [self sendEventWithName:@"@RNLlama_onInitContextProgress" body:@{ @"contextId": @(contextId), @"progress": @(progress) }];
70
+ });
71
+ }];
72
+ if (![context isModelLoaded]) {
73
+ reject(@"llama_cpp_error", @"Failed to load the model", nil);
74
+ return;
75
+ }
76
+
77
+ [llamaContexts setObject:context forKey:contextIdNumber];
78
+
79
+ resolve(@{
80
+ @"gpu": @([context isMetalEnabled]),
81
+ @"reasonNoGPU": [context reasonNoMetal],
82
+ @"model": [context modelInfo],
83
+ });
84
+ } @catch (NSException *exception) {
85
+ reject(@"llama_cpp_error", exception.reason, nil);
86
+ }
87
+ }
88
+
89
+ RCT_EXPORT_METHOD(getFormattedChat:(double)contextId
90
+ withMessages:(NSString *)messages
91
+ withTemplate:(NSString *)chatTemplate
92
+ withParams:(NSDictionary *)params
93
+ withResolver:(RCTPromiseResolveBlock)resolve
94
+ withRejecter:(RCTPromiseRejectBlock)reject)
95
+ {
96
+ RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
97
+ if (context == nil) {
98
+ reject(@"llama_error", @"Context not found", nil);
99
+ return;
100
+ }
101
+ try {
102
+ if ([params[@"jinja"] boolValue]) {
103
+ NSString *jsonSchema = params[@"json_schema"];
104
+ NSString *tools = params[@"tools"];
105
+ bool parallelToolCalls = [params[@"parallel_tool_calls"] boolValue];
106
+ NSString *toolChoice = params[@"tool_choice"];
107
+ resolve([context getFormattedChatWithJinja:messages withChatTemplate:chatTemplate withJsonSchema:jsonSchema withTools:tools withParallelToolCalls:parallelToolCalls withToolChoice:toolChoice]);
108
+ } else {
109
+ resolve([context getFormattedChat:messages withChatTemplate:chatTemplate]);
110
+ }
111
+ } catch (const nlohmann::json_abi_v3_11_3::detail::parse_error& e) {
112
+ NSString *errorMessage = [NSString stringWithUTF8String:e.what()];
113
+ reject(@"llama_error", [NSString stringWithFormat:@"JSON parse error in getFormattedChat: %@", errorMessage], nil);
114
+ } catch (const std::exception& e) { // catch cpp exceptions
115
+ reject(@"llama_error", [NSString stringWithUTF8String:e.what()], nil);
116
+ } catch (...) {
117
+ reject(@"llama_error", @"Unknown error in getFormattedChat", nil);
118
+ }
119
+ }
120
+
121
+ RCT_EXPORT_METHOD(loadSession:(double)contextId
122
+ withFilePath:(NSString *)filePath
123
+ withResolver:(RCTPromiseResolveBlock)resolve
124
+ withRejecter:(RCTPromiseRejectBlock)reject)
125
+ {
126
+ RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
127
+ if (context == nil) {
128
+ reject(@"llama_error", @"Context not found", nil);
129
+ return;
130
+ }
131
+ if ([context isPredicting]) {
132
+ reject(@"llama_error", @"Context is busy", nil);
133
+ return;
134
+ }
135
+ dispatch_async(llamaDQueue, ^{
136
+ @try {
137
+ @autoreleasepool {
138
+ resolve([context loadSession:filePath]);
139
+ }
140
+ } @catch (NSException *exception) {
141
+ reject(@"llama_cpp_error", exception.reason, nil);
142
+ }
143
+ });
144
+ }
145
+
146
+ RCT_EXPORT_METHOD(saveSession:(double)contextId
147
+ withFilePath:(NSString *)filePath
148
+ withSize:(double)size
149
+ withResolver:(RCTPromiseResolveBlock)resolve
150
+ withRejecter:(RCTPromiseRejectBlock)reject)
151
+ {
152
+ RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
153
+ if (context == nil) {
154
+ reject(@"llama_error", @"Context not found", nil);
155
+ return;
156
+ }
157
+ if ([context isPredicting]) {
158
+ reject(@"llama_error", @"Context is busy", nil);
159
+ return;
160
+ }
161
+ dispatch_async(llamaDQueue, ^{
162
+ @try {
163
+ @autoreleasepool {
164
+ int count = [context saveSession:filePath size:(int)size];
165
+ resolve(@(count));
166
+ }
167
+ } @catch (NSException *exception) {
168
+ reject(@"llama_cpp_error", exception.reason, nil);
169
+ }
170
+ });
171
+ }
172
+
173
+ - (NSArray *)supportedEvents {
174
+ return@[
175
+ @"@RNLlama_onInitContextProgress",
176
+ @"@RNLlama_onToken",
177
+ @"@RNLlama_onNativeLog",
178
+ ];
179
+ }
180
+
181
+ RCT_EXPORT_METHOD(completion:(double)contextId
182
+ withCompletionParams:(NSDictionary *)completionParams
183
+ withResolver:(RCTPromiseResolveBlock)resolve
184
+ withRejecter:(RCTPromiseRejectBlock)reject)
185
+ {
186
+ RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
187
+ if (context == nil) {
188
+ reject(@"llama_error", @"Context not found", nil);
189
+ return;
190
+ }
191
+ if ([context isPredicting]) {
192
+ reject(@"llama_error", @"Context is busy", nil);
193
+ return;
194
+ }
195
+ dispatch_async(llamaDQueue, ^{
196
+ @try {
197
+ @autoreleasepool {
198
+ NSDictionary* completionResult = [context completion:completionParams
199
+ onToken:^(NSMutableDictionary *tokenResult) {
200
+ if (![completionParams[@"emit_partial_completion"] boolValue]) return;
201
+ dispatch_async(dispatch_get_main_queue(), ^{
202
+ [self sendEventWithName:@"@RNLlama_onToken"
203
+ body:@{
204
+ @"contextId": [NSNumber numberWithDouble:contextId],
205
+ @"tokenResult": tokenResult
206
+ }
207
+ ];
208
+ [tokenResult release];
209
+ });
210
+ }
211
+ ];
212
+ resolve(completionResult);
213
+ }
214
+ } @catch (NSException *exception) {
215
+ reject(@"llama_cpp_error", exception.reason, nil);
216
+ [context stopCompletion];
217
+ }
218
+ });
219
+
220
+ }
221
+
222
+ RCT_EXPORT_METHOD(stopCompletion:(double)contextId
223
+ withResolver:(RCTPromiseResolveBlock)resolve
224
+ withRejecter:(RCTPromiseRejectBlock)reject)
225
+ {
226
+ RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
227
+ if (context == nil) {
228
+ reject(@"llama_error", @"Context not found", nil);
229
+ return;
230
+ }
231
+ [context stopCompletion];
232
+ resolve(nil);
233
+ }
234
+
235
+ RCT_EXPORT_METHOD(tokenizeASync:(double)contextId
236
+ text:(NSString *)text
237
+ withResolver:(RCTPromiseResolveBlock)resolve
238
+ withRejecter:(RCTPromiseRejectBlock)reject)
239
+ {
240
+ RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
241
+ if (context == nil) {
242
+ reject(@"llama_error", @"Context not found", nil);
243
+ return;
244
+ }
245
+ NSMutableArray *tokens = [context tokenize:text];
246
+ resolve(@{ @"tokens": tokens });
247
+ [tokens release];
248
+ }
249
+
250
+ RCT_EXPORT_BLOCKING_SYNCHRONOUS_METHOD(tokenizeSync:(double)contextId
251
+ text:(NSString *)text)
252
+ {
253
+ RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
254
+ if (context == nil) {
255
+ return @{ @"error": @"Context not found" };
256
+ }
257
+ NSMutableArray *tokens = [context tokenize:text];
258
+ NSDictionary *result = @{ @"tokens": tokens };
259
+ [tokens release];
260
+ return result;
261
+ }
262
+
263
+ RCT_EXPORT_METHOD(detokenize:(double)contextId
264
+ tokens:(NSArray *)tokens
265
+ withResolver:(RCTPromiseResolveBlock)resolve
266
+ withRejecter:(RCTPromiseRejectBlock)reject)
267
+ {
268
+ RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
269
+ if (context == nil) {
270
+ reject(@"llama_error", @"Context not found", nil);
271
+ return;
272
+ }
273
+ resolve([context detokenize:tokens]);
274
+ }
275
+
276
+ RCT_EXPORT_METHOD(embedding:(double)contextId
277
+ text:(NSString *)text
278
+ params:(NSDictionary *)params
279
+ withResolver:(RCTPromiseResolveBlock)resolve
280
+ withRejecter:(RCTPromiseRejectBlock)reject)
281
+ {
282
+ RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
283
+ if (context == nil) {
284
+ reject(@"llama_error", @"Context not found", nil);
285
+ return;
286
+ }
287
+ @try {
288
+ NSDictionary *embedding = [context embedding:text params:params];
289
+ resolve(embedding);
290
+ } @catch (NSException *exception) {
291
+ reject(@"llama_cpp_error", exception.reason, nil);
292
+ }
293
+ }
294
+
295
+ RCT_EXPORT_METHOD(bench:(double)contextId
296
+ pp:(int)pp
297
+ tg:(int)tg
298
+ pl:(int)pl
299
+ nr:(int)nr
300
+ withResolver:(RCTPromiseResolveBlock)resolve
301
+ withRejecter:(RCTPromiseRejectBlock)reject)
302
+ {
303
+ RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
304
+ if (context == nil) {
305
+ reject(@"llama_error", @"Context not found", nil);
306
+ return;
307
+ }
308
+ @try {
309
+ NSString *benchResults = [context bench:pp tg:tg pl:pl nr:nr];
310
+ resolve(benchResults);
311
+ } @catch (NSException *exception) {
312
+ reject(@"llama_cpp_error", exception.reason, nil);
313
+ }
314
+ }
315
+
316
+ RCT_EXPORT_METHOD(applyLoraAdapters:(double)contextId
317
+ withLoraAdapters:(NSArray *)loraAdapters
318
+ withResolver:(RCTPromiseResolveBlock)resolve
319
+ withRejecter:(RCTPromiseRejectBlock)reject)
320
+ {
321
+ RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
322
+ if (context == nil) {
323
+ reject(@"llama_error", @"Context not found", nil);
324
+ return;
325
+ }
326
+ if ([context isPredicting]) {
327
+ reject(@"llama_error", @"Context is busy", nil);
328
+ return;
329
+ }
330
+ [context applyLoraAdapters:loraAdapters];
331
+ resolve(nil);
332
+ }
333
+
334
+ RCT_EXPORT_METHOD(removeLoraAdapters:(double)contextId
335
+ withResolver:(RCTPromiseResolveBlock)resolve
336
+ withRejecter:(RCTPromiseRejectBlock)reject)
337
+ {
338
+ RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
339
+ if (context == nil) {
340
+ reject(@"llama_error", @"Context not found", nil);
341
+ return;
342
+ }
343
+ if ([context isPredicting]) {
344
+ reject(@"llama_error", @"Context is busy", nil);
345
+ return;
346
+ }
347
+ [context removeLoraAdapters];
348
+ resolve(nil);
349
+ }
350
+
351
+ RCT_EXPORT_METHOD(getLoadedLoraAdapters:(double)contextId
352
+ withResolver:(RCTPromiseResolveBlock)resolve
353
+ withRejecter:(RCTPromiseRejectBlock)reject)
354
+ {
355
+ RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
356
+ if (context == nil) {
357
+ reject(@"llama_error", @"Context not found", nil);
358
+ return;
359
+ }
360
+ resolve([context getLoadedLoraAdapters]);
361
+ }
362
+
363
+ RCT_EXPORT_METHOD(releaseContext:(double)contextId
364
+ withResolver:(RCTPromiseResolveBlock)resolve
365
+ withRejecter:(RCTPromiseRejectBlock)reject)
366
+ {
367
+ RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
368
+ if (context == nil) {
369
+ reject(@"llama_error", @"Context not found", nil);
370
+ return;
371
+ }
372
+ if (![context isModelLoaded]) {
373
+ [context interruptLoad];
374
+ }
375
+ [context stopCompletion];
376
+ dispatch_barrier_sync(llamaDQueue, ^{});
377
+ [context invalidate];
378
+ [llamaContexts removeObjectForKey:[NSNumber numberWithDouble:contextId]];
379
+ resolve(nil);
380
+ }
381
+
382
+ RCT_EXPORT_METHOD(releaseAllContexts:(RCTPromiseResolveBlock)resolve
383
+ withRejecter:(RCTPromiseRejectBlock)reject)
384
+ {
385
+ [self invalidate];
386
+ resolve(nil);
387
+ }
388
+
389
+
390
+ - (void)invalidate {
391
+ if (llamaContexts == nil) {
392
+ return;
393
+ }
394
+
395
+ for (NSNumber *contextId in llamaContexts) {
396
+ RNLlamaContext *context = llamaContexts[contextId];
397
+ [context stopCompletion];
398
+ dispatch_barrier_sync(llamaDQueue, ^{});
399
+ [context invalidate];
400
+ }
401
+
402
+ [llamaContexts removeAllObjects];
403
+ [llamaContexts release];
404
+ llamaContexts = nil;
405
+
406
+ if (llamaDQueue != nil) {
407
+ dispatch_release(llamaDQueue);
408
+ llamaDQueue = nil;
409
+ }
410
+
411
+ [super invalidate];
412
+ }
413
+
414
+ // Don't compile this code when we build for the old architecture.
415
+ #ifdef RCT_NEW_ARCH_ENABLED
416
+ - (std::shared_ptr<facebook::react::TurboModule>)getTurboModule:
417
+ (const facebook::react::ObjCTurboModule::InitParams &)params
418
+ {
419
+ return std::make_shared<facebook::react::NativeRNLlamaSpecJSI>(params);
420
+ }
421
+ #endif
422
+
423
+ @end