cui-llama.rn 1.5.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. package/LICENSE +20 -20
  2. package/README.md +345 -319
  3. package/android/build.gradle +116 -116
  4. package/android/gradle.properties +5 -5
  5. package/android/src/main/AndroidManifest.xml +4 -4
  6. package/android/src/main/CMakeLists.txt +129 -124
  7. package/android/src/main/java/com/rnllama/LlamaContext.java +648 -645
  8. package/android/src/main/java/com/rnllama/RNLlama.java +695 -695
  9. package/android/src/main/java/com/rnllama/RNLlamaPackage.java +48 -48
  10. package/android/src/main/jni-utils.h +100 -100
  11. package/android/src/main/jni.cpp +1279 -1263
  12. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  14. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  15. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  16. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  17. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  18. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  19. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  20. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +135 -135
  21. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +136 -136
  22. package/cpp/LICENSE +21 -0
  23. package/cpp/README.md +4 -4
  24. package/cpp/chat.cpp +1 -1
  25. package/cpp/common.cpp +17 -2
  26. package/cpp/common.h +7 -3
  27. package/cpp/ggml-alloc.c +4 -1
  28. package/cpp/ggml-cpp.h +1 -1
  29. package/cpp/ggml-cpu/amx/amx.cpp +221 -0
  30. package/cpp/ggml-cpu/amx/amx.h +8 -0
  31. package/cpp/ggml-cpu/amx/common.h +91 -0
  32. package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
  33. package/cpp/ggml-cpu/amx/mmq.h +10 -0
  34. package/cpp/{binary-ops.h → ggml-cpu/binary-ops.h} +1 -1
  35. package/cpp/ggml-cpu/common.h +72 -0
  36. package/cpp/{ggml-cpu-aarch64.cpp → ggml-cpu/ggml-cpu-aarch64.cpp} +809 -101
  37. package/cpp/{ggml-cpu.c → ggml-cpu/ggml-cpu.c} +109 -42
  38. package/cpp/{ggml-cpu.cpp → ggml-cpu/ggml-cpu.cpp} +3 -0
  39. package/cpp/{ops.cpp → ggml-cpu/ops.cpp} +246 -160
  40. package/cpp/{ops.h → ggml-cpu/ops.h} +2 -20
  41. package/cpp/{sgemm.cpp → ggml-cpu/sgemm.cpp} +501 -0
  42. package/cpp/{simd-mappings.h → ggml-cpu/simd-mappings.h} +7 -3
  43. package/cpp/{unary-ops.h → ggml-cpu/unary-ops.h} +1 -1
  44. package/cpp/ggml-cpu.h +5 -0
  45. package/cpp/ggml-impl.h +16 -9
  46. package/cpp/ggml-llama-sim.metallib +0 -0
  47. package/cpp/ggml-llama.metallib +0 -0
  48. package/cpp/ggml-metal-impl.h +597 -597
  49. package/cpp/ggml-metal.m +496 -47
  50. package/cpp/ggml.c +134 -244
  51. package/cpp/ggml.h +62 -95
  52. package/cpp/json-schema-to-grammar.cpp +3 -0
  53. package/cpp/llama-arch.cpp +46 -17
  54. package/cpp/llama-arch.h +9 -0
  55. package/cpp/llama-batch.cpp +5 -1
  56. package/cpp/llama-batch.h +2 -1
  57. package/cpp/llama-chat.cpp +31 -10
  58. package/cpp/llama-chat.h +3 -2
  59. package/cpp/llama-context.cpp +104 -489
  60. package/cpp/llama-context.h +14 -30
  61. package/cpp/llama-graph.cpp +69 -62
  62. package/cpp/llama-graph.h +21 -18
  63. package/cpp/llama-hparams.h +5 -0
  64. package/cpp/llama-kv-cache.cpp +1497 -391
  65. package/cpp/llama-kv-cache.h +272 -80
  66. package/cpp/llama-memory.h +11 -1
  67. package/cpp/llama-model.cpp +502 -176
  68. package/cpp/llama-model.h +13 -3
  69. package/cpp/llama-sampling.cpp +2 -1
  70. package/cpp/llama-vocab.cpp +8 -1
  71. package/cpp/llama.h +14 -11
  72. package/cpp/rn-llama.cpp +721 -873
  73. package/cpp/rn-llama.h +134 -138
  74. package/cpp/sampling.h +107 -107
  75. package/cpp/unicode-data.cpp +7034 -7034
  76. package/cpp/unicode-data.h +20 -20
  77. package/cpp/unicode.cpp +849 -849
  78. package/cpp/unicode.h +66 -66
  79. package/ios/CMakeLists.txt +119 -108
  80. package/ios/RNLlama.h +13 -7
  81. package/ios/RNLlama.mm +423 -405
  82. package/ios/RNLlamaContext.h +57 -57
  83. package/ios/RNLlamaContext.mm +833 -835
  84. package/ios/rnllama.xcframework/Info.plist +74 -74
  85. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +143 -0
  86. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +681 -0
  87. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
  88. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
  89. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  90. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
  91. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
  92. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
  93. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +143 -0
  94. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +601 -0
  95. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  96. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
  97. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
  98. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
  99. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
  100. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +2189 -0
  101. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/gguf.h +202 -0
  102. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  103. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json.hpp +24766 -0
  104. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
  105. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +437 -0
  106. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +89 -0
  107. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +57 -0
  108. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +249 -0
  109. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
  110. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
  111. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
  112. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +595 -0
  113. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +161 -0
  114. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
  115. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-io.h +35 -0
  116. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +405 -0
  117. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +31 -0
  118. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
  119. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
  120. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +419 -0
  121. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
  122. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +1437 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/log.h +132 -0
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +134 -0
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sampling.h +107 -0
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/speculative.h +28 -0
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
  131. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode.h +66 -0
  132. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
  133. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  134. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  135. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
  136. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +681 -0
  137. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
  138. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
  139. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  140. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
  141. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
  142. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
  143. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +143 -0
  144. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +601 -0
  145. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  146. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
  147. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
  148. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
  149. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
  150. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2189 -0
  151. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
  152. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  153. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
  154. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
  155. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +437 -0
  156. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +89 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +57 -0
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +249 -0
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
  160. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
  161. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
  162. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +595 -0
  163. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +161 -0
  164. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
  165. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
  166. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +405 -0
  167. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +31 -0
  168. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
  169. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
  170. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +419 -0
  171. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
  172. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
  173. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1437 -0
  174. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
  175. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  176. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  177. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +134 -0
  178. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
  179. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
  180. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
  181. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
  182. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  183. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
  184. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  185. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  186. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +143 -0
  187. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +681 -0
  188. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
  189. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
  190. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  191. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
  192. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
  193. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
  194. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +143 -0
  195. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +601 -0
  196. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  197. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
  198. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
  199. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
  200. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
  201. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +2189 -0
  202. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/gguf.h +202 -0
  203. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  204. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json.hpp +24766 -0
  205. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
  206. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +437 -0
  207. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +89 -0
  208. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +57 -0
  209. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +249 -0
  210. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
  211. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
  212. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
  213. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +595 -0
  214. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +161 -0
  215. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
  216. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-io.h +35 -0
  217. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +405 -0
  218. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +31 -0
  219. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
  220. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
  221. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +419 -0
  222. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
  223. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
  224. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +1437 -0
  225. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/log.h +132 -0
  226. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  227. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  228. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +134 -0
  229. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sampling.h +107 -0
  230. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/speculative.h +28 -0
  231. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
  232. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode.h +66 -0
  233. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
  234. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  235. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  236. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
  237. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +681 -0
  238. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
  239. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
  240. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  241. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
  242. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
  243. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
  244. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +143 -0
  245. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +601 -0
  246. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  247. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
  248. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
  249. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
  250. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
  251. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2189 -0
  252. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
  253. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  254. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
  255. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
  256. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +437 -0
  257. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +89 -0
  258. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +57 -0
  259. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +249 -0
  260. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
  261. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
  262. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
  263. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +595 -0
  264. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +161 -0
  265. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
  266. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
  267. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +405 -0
  268. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +31 -0
  269. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
  270. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
  271. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +419 -0
  272. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
  273. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
  274. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1437 -0
  275. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
  276. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  277. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  278. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +134 -0
  279. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
  280. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
  281. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
  282. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
  283. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  284. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
  285. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  286. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  287. package/jest/mock.js +203 -203
  288. package/lib/commonjs/NativeRNLlama.js +1 -2
  289. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  290. package/lib/commonjs/chat.js.map +1 -1
  291. package/lib/commonjs/grammar.js +12 -31
  292. package/lib/commonjs/grammar.js.map +1 -1
  293. package/lib/commonjs/index.js +47 -47
  294. package/lib/commonjs/index.js.map +1 -1
  295. package/lib/commonjs/package.json +1 -0
  296. package/lib/module/NativeRNLlama.js +2 -0
  297. package/lib/module/NativeRNLlama.js.map +1 -1
  298. package/lib/module/chat.js +2 -0
  299. package/lib/module/chat.js.map +1 -1
  300. package/lib/module/grammar.js +14 -31
  301. package/lib/module/grammar.js.map +1 -1
  302. package/lib/module/index.js +47 -45
  303. package/lib/module/index.js.map +1 -1
  304. package/lib/module/package.json +1 -0
  305. package/lib/typescript/NativeRNLlama.d.ts +10 -4
  306. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  307. package/lib/typescript/index.d.ts.map +1 -1
  308. package/llama-rn.podspec +48 -48
  309. package/package.json +233 -233
  310. package/src/NativeRNLlama.ts +431 -426
  311. package/src/chat.ts +44 -44
  312. package/src/grammar.ts +854 -854
  313. package/src/index.ts +495 -487
  314. /package/cpp/{binary-ops.cpp → ggml-cpu/binary-ops.cpp} +0 -0
  315. /package/cpp/{ggml-cpu-aarch64.h → ggml-cpu/ggml-cpu-aarch64.h} +0 -0
  316. /package/cpp/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -0
  317. /package/cpp/{ggml-cpu-quants.c → ggml-cpu/ggml-cpu-quants.c} +0 -0
  318. /package/cpp/{ggml-cpu-quants.h → ggml-cpu/ggml-cpu-quants.h} +0 -0
  319. /package/cpp/{ggml-cpu-traits.cpp → ggml-cpu/ggml-cpu-traits.cpp} +0 -0
  320. /package/cpp/{ggml-cpu-traits.h → ggml-cpu/ggml-cpu-traits.h} +0 -0
  321. /package/cpp/{sgemm.h → ggml-cpu/sgemm.h} +0 -0
  322. /package/cpp/{unary-ops.cpp → ggml-cpu/unary-ops.cpp} +0 -0
  323. /package/cpp/{vec.cpp → ggml-cpu/vec.cpp} +0 -0
  324. /package/cpp/{vec.h → ggml-cpu/vec.h} +0 -0
package/README.md CHANGED
@@ -1,319 +1,345 @@
1
- # cui-llama.rn
2
-
3
- This is a fork of [llama.rn](https://github.com/mybigday/llama.rn) meant for [ChatterUI](https://github.com/Vali-98/ChatterUI)
4
-
5
- This fork exists to update llama.cpp on a more frequent basis, plus adding useful features to ChatterUI.
6
-
7
- The following features have been added for Android:
8
-
9
- - Added stopping prompt processing between batches, vital for mobile devices with very slow prompt processing
10
- - `vocab_only` mode: utilize the llama.cpp tokenizer
11
- - tokenizeSync: non-blocking, synchronous tokenizer function
12
- - Context Shift taken from [kobold.cpp](https://github.com/LostRuins/koboldcpp)
13
- - Retrieving CPU Features to check for i8mm and dotprod flags
14
-
15
- There is no IOS implementation for these features.
16
-
17
- Original repo README.md below.
18
-
19
- # llama.rn
20
-
21
- [![Actions Status](https://github.com/mybigday/llama.rn/workflows/CI/badge.svg)](https://github.com/mybigday/llama.rn/actions)
22
- [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
23
- [![npm](https://img.shields.io/npm/v/llama.rn.svg)](https://www.npmjs.com/package/llama.rn/)
24
-
25
- React Native binding of [llama.cpp](https://github.com/ggerganov/llama.cpp).
26
-
27
- [llama.cpp](https://github.com/ggerganov/llama.cpp): Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
28
-
29
- ## Installation
30
-
31
- ```sh
32
- npm install llama.rn
33
- ```
34
-
35
- #### iOS
36
-
37
- Please re-run `npx pod-install` again.
38
-
39
- By default, `llama.rn` will use pre-built `rnllama.xcframework` for iOS. If you want to build from source, please set `RNLLAMA_BUILD_FROM_SOURCE` to `1` in your Podfile.
40
-
41
- #### Android
42
-
43
- Add proguard rule if it's enabled in project (android/app/proguard-rules.pro):
44
-
45
- ```proguard
46
- # llama.rn
47
- -keep class com.rnllama.** { *; }
48
- ```
49
-
50
- By default, `llama.rn` will use pre-built libraries for Android. If you want to build from source, please set `rnllamaBuildFromSource` to `true` in `android/gradle.properties`.
51
-
52
- ## Obtain the model
53
-
54
- You can search HuggingFace for available models (Keyword: [`GGUF`](https://huggingface.co/search/full-text?q=GGUF&type=model)).
55
-
56
- For get a GGUF model or quantize manually, see [`Prepare and Quantize`](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#prepare-and-quantize) section in llama.cpp.
57
-
58
- ## Usage
59
-
60
- Load model info only:
61
-
62
- ```js
63
- import { loadLlamaModelInfo } from 'llama.rn'
64
-
65
- const modelPath = 'file://<path to gguf model>'
66
- console.log('Model Info:', await loadLlamaModelInfo(modelPath))
67
- ```
68
-
69
- Initialize a Llama context & do completion:
70
-
71
- ```js
72
- import { initLlama } from 'llama.rn'
73
-
74
- // Initial a Llama context with the model (may take a while)
75
- const context = await initLlama({
76
- model: modelPath,
77
- use_mlock: true,
78
- n_ctx: 2048,
79
- n_gpu_layers: 99, // number of layers to store in VRAM (Currently only for iOS)
80
- // embedding: true, // use embedding
81
- })
82
-
83
- const stopWords = ['</s>', '<|end|>', '<|eot_id|>', '<|end_of_text|>', '<|im_end|>', '<|EOT|>', '<|END_OF_TURN_TOKEN|>', '<|end_of_turn|>', '<|endoftext|>']
84
-
85
- // Do chat completion
86
- const msgResult = await context.completion(
87
- {
88
- messages: [
89
- {
90
- role: 'system',
91
- content: 'This is a conversation between user and assistant, a friendly chatbot.',
92
- },
93
- {
94
- role: 'user',
95
- content: 'Hello!',
96
- },
97
- ],
98
- n_predict: 100,
99
- stop: stopWords,
100
- // ...other params
101
- },
102
- (data) => {
103
- // This is a partial completion callback
104
- const { token } = data
105
- },
106
- )
107
- console.log('Result:', msgResult.text)
108
- console.log('Timings:', msgResult.timings)
109
-
110
- // Or do text completion
111
- const textResult = await context.completion(
112
- {
113
- prompt:
114
- 'This is a conversation between user and llama, a friendly chatbot. respond in simple markdown.\n\nUser: Hello!\nLlama:',
115
- n_predict: 100,
116
- stop: [...stopWords, 'Llama:', 'User:'],
117
- // ...other params
118
- },
119
- (data) => {
120
- // This is a partial completion callback
121
- const { token } = data
122
- },
123
- )
124
- console.log('Result:', textResult.text)
125
- console.log('Timings:', textResult.timings)
126
- ```
127
-
128
- The binding’s deisgn inspired by [server.cpp](https://github.com/ggerganov/llama.cpp/tree/master/examples/server) example in llama.cpp, so you can map its API to LlamaContext:
129
-
130
- - `/completion` and `/chat/completions`: `context.completion(params, partialCompletionCallback)`
131
- - `/tokenize`: `context.tokenize(content)`
132
- - `/detokenize`: `context.detokenize(tokens)`
133
- - `/embedding`: `context.embedding(content)`
134
- - Other methods
135
- - `context.loadSession(path)`
136
- - `context.saveSession(path)`
137
- - `context.stopCompletion()`
138
- - `context.release()`
139
-
140
- Please visit the [Documentation](docs/API) for more details.
141
-
142
- You can also visit the [example](example) to see how to use it.
143
-
144
- ## Tool Calling
145
-
146
- `llama.rn` has universal tool call support by using [minja](https://github.com/google/minja) (as Jinja template parser) and [chat.cpp](https://github.com/ggerganov/llama.cpp/blob/master/common/chat.cpp) in llama.cpp.
147
-
148
- Example:
149
-
150
- ```js
151
- import { initLlama } from 'llama.rn'
152
-
153
- const context = await initLlama({
154
- // ...params
155
- })
156
-
157
- const { text, tool_calls } = await context.completion({
158
- // ...params
159
- jinja: true, // Enable Jinja template parser
160
- tool_choice: 'auto',
161
- tools: [
162
- {
163
- type: 'function',
164
- function: {
165
- name: 'ipython',
166
- description:
167
- 'Runs code in an ipython interpreter and returns the result of the execution after 60 seconds.',
168
- parameters: {
169
- type: 'object',
170
- properties: {
171
- code: {
172
- type: 'string',
173
- description: 'The code to run in the ipython interpreter.',
174
- },
175
- },
176
- required: ['code'],
177
- },
178
- },
179
- },
180
- ],
181
- messages: [
182
- {
183
- role: 'system',
184
- content: 'You are a helpful assistant that can answer questions and help with tasks.',
185
- },
186
- {
187
- role: 'user',
188
- content: 'Test',
189
- },
190
- ],
191
- })
192
- console.log('Result:', text)
193
- // If tool_calls is not empty, it means the model has called the tool
194
- if (tool_calls) console.log('Tool Calls:', tool_calls)
195
- ```
196
-
197
- You can check [chat.cpp](https://github.com/ggerganov/llama.cpp/blob/6eecde3cc8fda44da7794042e3668de4af3c32c6/common/chat.cpp#L7-L23) for models has native tool calling support, or it will fallback to `GENERIC` type tool call.
198
-
199
- The generic tool call will be always JSON object as output, the output will be like `{"response": "..."}` when it not decided to use tool call.
200
-
201
- ## Grammar Sampling
202
-
203
- GBNF (GGML BNF) is a format for defining [formal grammars](https://en.wikipedia.org/wiki/Formal_grammar) to constrain model outputs in `llama.cpp`. For example, you can use it to force the model to generate valid JSON, or speak only in emojis.
204
-
205
- You can see [GBNF Guide](https://github.com/ggerganov/llama.cpp/tree/master/grammars) for more details.
206
-
207
- `llama.rn` provided a built-in function to convert JSON Schema to GBNF:
208
-
209
- Example gbnf grammar:
210
- ```bnf
211
- root ::= object
212
- value ::= object | array | string | number | ("true" | "false" | "null") ws
213
-
214
- object ::=
215
- "{" ws (
216
- string ":" ws value
217
- ("," ws string ":" ws value)*
218
- )? "}" ws
219
-
220
- array ::=
221
- "[" ws (
222
- value
223
- ("," ws value)*
224
- )? "]" ws
225
-
226
- string ::=
227
- "\"" (
228
- [^"\\\x7F\x00-\x1F] |
229
- "\\" (["\\bfnrt] | "u" [0-9a-fA-F]{4}) # escapes
230
- )* "\"" ws
231
-
232
- number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [0-9] [1-9]{0,15})? ws
233
-
234
- # Optional space: by convention, applied in this grammar after literal chars when allowed
235
- ws ::= | " " | "\n" [ \t]{0,20}
236
- ```
237
-
238
- ```js
239
- import { initLlama } from 'llama.rn'
240
-
241
- const gbnf = '...'
242
-
243
- const context = await initLlama({
244
- // ...params
245
- grammar: gbnf,
246
- })
247
-
248
- const { text } = await context.completion({
249
- // ...params
250
- messages: [
251
- {
252
- role: 'system',
253
- content: 'You are a helpful assistant that can answer questions and help with tasks.',
254
- },
255
- {
256
- role: 'user',
257
- content: 'Test',
258
- },
259
- ],
260
- })
261
- console.log('Result:', text)
262
- ```
263
-
264
- Also, this is how `json_schema` works in `response_format` during completion, it converts the json_schema to gbnf grammar.
265
-
266
- ## Mock `llama.rn`
267
-
268
- We have provided a mock version of `llama.rn` for testing purpose you can use on Jest:
269
-
270
- ```js
271
- jest.mock('llama.rn', () => require('llama.rn/jest/mock'))
272
- ```
273
-
274
- ## NOTE
275
-
276
- iOS:
277
-
278
- - The [Extended Virtual Addressing](https://developer.apple.com/documentation/bundleresources/entitlements/com_apple_developer_kernel_extended-virtual-addressing) capability is recommended to enable on iOS project.
279
- - Metal:
280
- - We have tested to know some devices is not able to use Metal (GPU) due to llama.cpp used SIMD-scoped operation, you can check if your device is supported in [Metal feature set tables](https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf), Apple7 GPU will be the minimum requirement.
281
- - It's also not supported in iOS simulator due to [this limitation](https://developer.apple.com/documentation/metal/developing_metal_apps_that_run_in_simulator#3241609), we used constant buffers more than 14.
282
-
283
- Android:
284
-
285
- - Currently only supported arm64-v8a / x86_64 platform, this means you can't initialize a context on another platforms. The 64-bit platform are recommended because it can allocate more memory for the model.
286
- - No integrated any GPU backend yet.
287
-
288
- ## Contributing
289
-
290
- See the [contributing guide](CONTRIBUTING.md) to learn how to contribute to the repository and the development workflow.
291
-
292
- ## Apps using `llama.rn`
293
-
294
- - [BRICKS](https://bricks.tools): Our product for building interactive signage in simple way. We provide LLM functions as Generator LLM/Assistant.
295
- - [ChatterUI](https://github.com/Vali-98/ChatterUI): Simple frontend for LLMs built in react-native.
296
- - [PocketPal AI](https://github.com/a-ghorbani/pocketpal-ai): An app that brings language models directly to your phone.
297
-
298
- ## Node.js binding
299
-
300
- - [llama.node](https://github.com/mybigday/llama.node): An another Node.js binding of `llama.cpp` but made API same as `llama.rn`.
301
-
302
- ## License
303
-
304
- MIT
305
-
306
- ---
307
-
308
- Made with [create-react-native-library](https://github.com/callstack/react-native-builder-bob)
309
-
310
- ---
311
-
312
- <p align="center">
313
- <a href="https://bricks.tools">
314
- <img width="90px" src="https://avatars.githubusercontent.com/u/17320237?s=200&v=4">
315
- </a>
316
- <p align="center">
317
- Built and maintained by <a href="https://bricks.tools">BRICKS</a>.
318
- </p>
319
- </p>
1
+ # cui-llama.rn
2
+
3
+ This is a fork of [llama.rn](https://github.com/mybigday/llama.rn) meant for [ChatterUI](https://github.com/Vali-98/ChatterUI)
4
+
5
+ This fork exists to update llama.cpp on a more frequent basis, plus adding useful features to ChatterUI.
6
+
7
+ The following features have been added for Android:
8
+
9
+ - Added stopping prompt processing between batches, vital for mobile devices with very slow prompt processing
10
+ - `vocab_only` mode: utilize the llama.cpp tokenizer
11
+ - tokenizeSync: non-blocking, synchronous tokenizer function
12
+ - Context Shift taken from [kobold.cpp](https://github.com/LostRuins/koboldcpp)
13
+ - Retrieving CPU Features to check for i8mm and dotprod flags
14
+
15
+ Original repo README.md below.
16
+
17
+ # llama.rn
18
+
19
+ [![Actions Status](https://github.com/mybigday/llama.rn/workflows/CI/badge.svg)](https://github.com/mybigday/llama.rn/actions)
20
+ [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
21
+ [![npm](https://img.shields.io/npm/v/llama.rn.svg)](https://www.npmjs.com/package/llama.rn/)
22
+
23
+ React Native binding of [llama.cpp](https://github.com/ggerganov/llama.cpp).
24
+
25
+ [llama.cpp](https://github.com/ggerganov/llama.cpp): Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
26
+
27
+ ## Installation
28
+
29
+ ```sh
30
+ npm install llama.rn
31
+ ```
32
+
33
+ #### iOS
34
+
35
+ Please re-run `npx pod-install` again.
36
+
37
+ By default, `llama.rn` will use pre-built `rnllama.xcframework` for iOS. If you want to build from source, please set `RNLLAMA_BUILD_FROM_SOURCE` to `1` in your Podfile.
38
+
39
+ #### Android
40
+
41
+ Add proguard rule if it's enabled in project (android/app/proguard-rules.pro):
42
+
43
+ ```proguard
44
+ # llama.rn
45
+ -keep class com.rnllama.** { *; }
46
+ ```
47
+
48
+ By default, `llama.rn` will use pre-built libraries for Android. If you want to build from source, please set `rnllamaBuildFromSource` to `true` in `android/gradle.properties`.
49
+
50
+ ## Obtain the model
51
+
52
+ You can search HuggingFace for available models (Keyword: [`GGUF`](https://huggingface.co/search/full-text?q=GGUF&type=model)).
53
+
54
+ For get a GGUF model or quantize manually, see [`Prepare and Quantize`](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#prepare-and-quantize) section in llama.cpp.
55
+
56
+ ## Usage
57
+
58
+ Load model info only:
59
+
60
+ ```js
61
+ import { loadLlamaModelInfo } from 'llama.rn'
62
+
63
+ const modelPath = 'file://<path to gguf model>'
64
+ console.log('Model Info:', await loadLlamaModelInfo(modelPath))
65
+ ```
66
+
67
+ Initialize a Llama context & do completion:
68
+
69
+ ```js
70
+ import { initLlama } from 'llama.rn'
71
+
72
+ // Initial a Llama context with the model (may take a while)
73
+ const context = await initLlama({
74
+ model: modelPath,
75
+ use_mlock: true,
76
+ n_ctx: 2048,
77
+ n_gpu_layers: 99, // number of layers to store in VRAM (Currently only for iOS)
78
+ // embedding: true, // use embedding
79
+ })
80
+
81
+ const stopWords = ['</s>', '<|end|>', '<|eot_id|>', '<|end_of_text|>', '<|im_end|>', '<|EOT|>', '<|END_OF_TURN_TOKEN|>', '<|end_of_turn|>', '<|endoftext|>']
82
+
83
+ // Do chat completion
84
+ const msgResult = await context.completion(
85
+ {
86
+ messages: [
87
+ {
88
+ role: 'system',
89
+ content: 'This is a conversation between user and assistant, a friendly chatbot.',
90
+ },
91
+ {
92
+ role: 'user',
93
+ content: 'Hello!',
94
+ },
95
+ ],
96
+ n_predict: 100,
97
+ stop: stopWords,
98
+ // ...other params
99
+ },
100
+ (data) => {
101
+ // This is a partial completion callback
102
+ const { token } = data
103
+ },
104
+ )
105
+ console.log('Result:', msgResult.text)
106
+ console.log('Timings:', msgResult.timings)
107
+
108
+ // Or do text completion
109
+ const textResult = await context.completion(
110
+ {
111
+ prompt:
112
+ 'This is a conversation between user and llama, a friendly chatbot. respond in simple markdown.\n\nUser: Hello!\nLlama:',
113
+ n_predict: 100,
114
+ stop: [...stopWords, 'Llama:', 'User:'],
115
+ // ...other params
116
+ },
117
+ (data) => {
118
+ // This is a partial completion callback
119
+ const { token } = data
120
+ },
121
+ )
122
+ console.log('Result:', textResult.text)
123
+ console.log('Timings:', textResult.timings)
124
+ ```
125
+
126
+ The binding’s deisgn inspired by [server.cpp](https://github.com/ggerganov/llama.cpp/tree/master/examples/server) example in llama.cpp:
127
+
128
+ - `/completion` and `/chat/completions`: `context.completion(params, partialCompletionCallback)`
129
+ - `/tokenize`: `context.tokenize(content)`
130
+ - `/detokenize`: `context.detokenize(tokens)`
131
+ - `/embedding`: `context.embedding(content)`
132
+ - ... Other methods
133
+
134
+ Please visit the [Documentation](docs/API) for more details.
135
+
136
+ You can also visit the [example](example) to see how to use it.
137
+
138
+ ## Session (State)
139
+
140
+ The session file is a binary file that contains the state of the context, it can saves time of prompt processing.
141
+
142
+ ```js
143
+ const context = await initLlama({ ...params })
144
+
145
+ // After prompt processing or completion ...
146
+
147
+ // Save the session
148
+ await context.saveSession('<path to save session>')
149
+
150
+ // Load the session
151
+ await context.loadSession('<path to load session>')
152
+ ```
153
+
154
+ ## Embedding
155
+
156
+ The embedding API is used to get the embedding of a text.
157
+
158
+ ```js
159
+ const context = await initLlama({
160
+ ...params,
161
+ embedding: true,
162
+ })
163
+
164
+ const { embedding } = await context.embedding('Hello, world!')
165
+ ```
166
+
167
+ - You can use model like [nomic-ai/nomic-embed-text-v1.5-GGUF](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF) for better embedding quality.
168
+ - You can use DB like [op-sqlite](https://github.com/OP-Engineering/op-sqlite) with sqlite-vec support to store and search embeddings.
169
+
170
+ ## Tool Calling
171
+
172
+ `llama.rn` has universal tool call support by using [minja](https://github.com/google/minja) (as Jinja template parser) and [chat.cpp](https://github.com/ggerganov/llama.cpp/blob/master/common/chat.cpp) in llama.cpp.
173
+
174
+ Example:
175
+
176
+ ```js
177
+ import { initLlama } from 'llama.rn'
178
+
179
+ const context = await initLlama({
180
+ // ...params
181
+ })
182
+
183
+ const { text, tool_calls } = await context.completion({
184
+ // ...params
185
+ jinja: true, // Enable Jinja template parser
186
+ tool_choice: 'auto',
187
+ tools: [
188
+ {
189
+ type: 'function',
190
+ function: {
191
+ name: 'ipython',
192
+ description:
193
+ 'Runs code in an ipython interpreter and returns the result of the execution after 60 seconds.',
194
+ parameters: {
195
+ type: 'object',
196
+ properties: {
197
+ code: {
198
+ type: 'string',
199
+ description: 'The code to run in the ipython interpreter.',
200
+ },
201
+ },
202
+ required: ['code'],
203
+ },
204
+ },
205
+ },
206
+ ],
207
+ messages: [
208
+ {
209
+ role: 'system',
210
+ content: 'You are a helpful assistant that can answer questions and help with tasks.',
211
+ },
212
+ {
213
+ role: 'user',
214
+ content: 'Test',
215
+ },
216
+ ],
217
+ })
218
+ console.log('Result:', text)
219
+ // If tool_calls is not empty, it means the model has called the tool
220
+ if (tool_calls) console.log('Tool Calls:', tool_calls)
221
+ ```
222
+
223
+ You can check [chat.cpp](https://github.com/ggerganov/llama.cpp/blob/6eecde3cc8fda44da7794042e3668de4af3c32c6/common/chat.cpp#L7-L23) for models has native tool calling support, or it will fallback to `GENERIC` type tool call.
224
+
225
+ The generic tool call will be always JSON object as output, the output will be like `{"response": "..."}` when it not decided to use tool call.
226
+
227
+ ## Grammar Sampling
228
+
229
+ GBNF (GGML BNF) is a format for defining [formal grammars](https://en.wikipedia.org/wiki/Formal_grammar) to constrain model outputs in `llama.cpp`. For example, you can use it to force the model to generate valid JSON, or speak only in emojis.
230
+
231
+ You can see [GBNF Guide](https://github.com/ggerganov/llama.cpp/tree/master/grammars) for more details.
232
+
233
+ `llama.rn` provided a built-in function to convert JSON Schema to GBNF:
234
+
235
+ Example gbnf grammar:
236
+ ```bnf
237
+ root ::= object
238
+ value ::= object | array | string | number | ("true" | "false" | "null") ws
239
+
240
+ object ::=
241
+ "{" ws (
242
+ string ":" ws value
243
+ ("," ws string ":" ws value)*
244
+ )? "}" ws
245
+
246
+ array ::=
247
+ "[" ws (
248
+ value
249
+ ("," ws value)*
250
+ )? "]" ws
251
+
252
+ string ::=
253
+ "\"" (
254
+ [^"\\\x7F\x00-\x1F] |
255
+ "\\" (["\\bfnrt] | "u" [0-9a-fA-F]{4}) # escapes
256
+ )* "\"" ws
257
+
258
+ number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [0-9] [1-9]{0,15})? ws
259
+
260
+ # Optional space: by convention, applied in this grammar after literal chars when allowed
261
+ ws ::= | " " | "\n" [ \t]{0,20}
262
+ ```
263
+
264
+ ```js
265
+ import { initLlama } from 'llama.rn'
266
+
267
+ const gbnf = '...'
268
+
269
+ const context = await initLlama({
270
+ // ...params
271
+ grammar: gbnf,
272
+ })
273
+
274
+ const { text } = await context.completion({
275
+ // ...params
276
+ messages: [
277
+ {
278
+ role: 'system',
279
+ content: 'You are a helpful assistant that can answer questions and help with tasks.',
280
+ },
281
+ {
282
+ role: 'user',
283
+ content: 'Test',
284
+ },
285
+ ],
286
+ })
287
+ console.log('Result:', text)
288
+ ```
289
+
290
+ Also, this is how `json_schema` works in `response_format` during completion, it converts the json_schema to gbnf grammar.
291
+
292
+ ## Mock `llama.rn`
293
+
294
+ We have provided a mock version of `llama.rn` for testing purpose you can use on Jest:
295
+
296
+ ```js
297
+ jest.mock('llama.rn', () => require('llama.rn/jest/mock'))
298
+ ```
299
+
300
+ ## NOTE
301
+
302
+ iOS:
303
+
304
+ - The [Extended Virtual Addressing](https://developer.apple.com/documentation/bundleresources/entitlements/com_apple_developer_kernel_extended-virtual-addressing) and [Increased Memory Limit](https://developer.apple.com/documentation/bundleresources/entitlements/com.apple.developer.kernel.increased-memory-limit?language=objc) capabilities are recommended to enable on iOS project.
305
+ - Metal:
306
+ - We have tested to know some devices is not able to use Metal (GPU) due to llama.cpp used SIMD-scoped operation, you can check if your device is supported in [Metal feature set tables](https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf), Apple7 GPU will be the minimum requirement.
307
+ - It's also not supported in iOS simulator due to [this limitation](https://developer.apple.com/documentation/metal/developing_metal_apps_that_run_in_simulator#3241609), we used constant buffers more than 14.
308
+
309
+ Android:
310
+
311
+ - Currently only supported arm64-v8a / x86_64 platform, this means you can't initialize a context on another platforms. The 64-bit platform are recommended because it can allocate more memory for the model.
312
+ - No integrated any GPU backend yet.
313
+
314
+ ## Contributing
315
+
316
+ See the [contributing guide](CONTRIBUTING.md) to learn how to contribute to the repository and the development workflow.
317
+
318
+ ## Apps using `llama.rn`
319
+
320
+ - [BRICKS](https://bricks.tools): Our product for building interactive signage in simple way. We provide LLM functions as Generator LLM/Assistant.
321
+ - [ChatterUI](https://github.com/Vali-98/ChatterUI): Simple frontend for LLMs built in react-native.
322
+ - [PocketPal AI](https://github.com/a-ghorbani/pocketpal-ai): An app that brings language models directly to your phone.
323
+
324
+ ## Node.js binding
325
+
326
+ - [llama.node](https://github.com/mybigday/llama.node): An another Node.js binding of `llama.cpp` but made API same as `llama.rn`.
327
+
328
+ ## License
329
+
330
+ MIT
331
+
332
+ ---
333
+
334
+ Made with [create-react-native-library](https://github.com/callstack/react-native-builder-bob)
335
+
336
+ ---
337
+
338
+ <p align="center">
339
+ <a href="https://bricks.tools">
340
+ <img width="90px" src="https://avatars.githubusercontent.com/u/17320237?s=200&v=4">
341
+ </a>
342
+ <p align="center">
343
+ Built and maintained by <a href="https://bricks.tools">BRICKS</a>.
344
+ </p>
345
+ </p>