cui-llama.rn 1.4.6 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (366) hide show
  1. package/LICENSE +20 -20
  2. package/README.md +317 -319
  3. package/android/build.gradle +116 -116
  4. package/android/gradle.properties +5 -5
  5. package/android/src/main/AndroidManifest.xml +4 -4
  6. package/android/src/main/CMakeLists.txt +124 -117
  7. package/android/src/main/java/com/rnllama/LlamaContext.java +645 -645
  8. package/android/src/main/java/com/rnllama/RNLlama.java +695 -695
  9. package/android/src/main/java/com/rnllama/RNLlamaPackage.java +48 -48
  10. package/android/src/main/jni-utils.h +100 -100
  11. package/android/src/main/jni.cpp +1263 -1245
  12. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  14. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  15. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  16. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  17. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  18. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  19. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  20. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +135 -135
  21. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +136 -136
  22. package/cpp/README.md +4 -4
  23. package/cpp/binary-ops.cpp +158 -0
  24. package/cpp/binary-ops.h +16 -0
  25. package/cpp/chat.cpp +1769 -1779
  26. package/cpp/chat.h +9 -1
  27. package/cpp/common.cpp +20 -522
  28. package/cpp/common.h +13 -36
  29. package/cpp/cpu-common.h +72 -0
  30. package/cpp/ggml-common.h +12 -6
  31. package/cpp/ggml-cpu-aarch64.cpp +1557 -80
  32. package/cpp/ggml-cpu-impl.h +2 -21
  33. package/cpp/ggml-cpu-quants.c +904 -405
  34. package/cpp/ggml-cpu.c +909 -13237
  35. package/cpp/ggml-impl.h +50 -23
  36. package/cpp/ggml-llama-sim.metallib +0 -0
  37. package/cpp/ggml-llama.metallib +0 -0
  38. package/cpp/ggml-metal-impl.h +597 -523
  39. package/cpp/ggml-metal.m +798 -580
  40. package/cpp/ggml.c +92 -3
  41. package/cpp/ggml.h +30 -6
  42. package/cpp/gguf.cpp +1 -0
  43. package/cpp/llama-adapter.cpp +55 -20
  44. package/cpp/llama-adapter.h +11 -9
  45. package/cpp/llama-arch.cpp +217 -16
  46. package/cpp/llama-arch.h +25 -0
  47. package/cpp/llama-batch.h +2 -2
  48. package/cpp/llama-chat.cpp +54 -2
  49. package/cpp/llama-chat.h +3 -0
  50. package/cpp/llama-context.cpp +2294 -1238
  51. package/cpp/llama-context.h +214 -77
  52. package/cpp/llama-cparams.h +1 -0
  53. package/cpp/llama-graph.cpp +1695 -0
  54. package/cpp/llama-graph.h +592 -0
  55. package/cpp/llama-hparams.cpp +8 -0
  56. package/cpp/llama-hparams.h +17 -0
  57. package/cpp/llama-io.cpp +15 -0
  58. package/cpp/llama-io.h +35 -0
  59. package/cpp/llama-kv-cache.cpp +965 -303
  60. package/cpp/llama-kv-cache.h +145 -151
  61. package/cpp/llama-memory.cpp +1 -0
  62. package/cpp/llama-memory.h +21 -0
  63. package/cpp/llama-mmap.cpp +1 -1
  64. package/cpp/llama-model-loader.cpp +10 -5
  65. package/cpp/llama-model-loader.h +5 -3
  66. package/cpp/llama-model.cpp +9194 -201
  67. package/cpp/llama-model.h +40 -1
  68. package/cpp/llama-sampling.cpp +5 -0
  69. package/cpp/llama-vocab.cpp +36 -5
  70. package/cpp/llama.cpp +51 -9984
  71. package/cpp/llama.h +102 -22
  72. package/cpp/log.cpp +34 -0
  73. package/cpp/minja/chat-template.hpp +15 -7
  74. package/cpp/minja/minja.hpp +120 -94
  75. package/cpp/ops.cpp +8723 -0
  76. package/cpp/ops.h +128 -0
  77. package/cpp/rn-llama.cpp +873 -882
  78. package/cpp/rn-llama.h +138 -148
  79. package/cpp/sampling.cpp +3 -0
  80. package/cpp/sampling.h +107 -107
  81. package/cpp/sgemm.cpp +533 -88
  82. package/cpp/simd-mappings.h +888 -0
  83. package/cpp/speculative.cpp +4 -4
  84. package/cpp/unary-ops.cpp +186 -0
  85. package/cpp/unary-ops.h +28 -0
  86. package/cpp/unicode-data.cpp +7034 -7034
  87. package/cpp/unicode-data.h +20 -20
  88. package/cpp/unicode.cpp +849 -849
  89. package/cpp/unicode.h +66 -66
  90. package/cpp/vec.cpp +258 -0
  91. package/cpp/vec.h +802 -0
  92. package/ios/CMakeLists.txt +116 -105
  93. package/ios/RNLlama.h +7 -7
  94. package/ios/RNLlama.mm +418 -405
  95. package/ios/RNLlamaContext.h +57 -57
  96. package/ios/RNLlamaContext.mm +835 -819
  97. package/ios/rnllama.xcframework/Info.plist +74 -74
  98. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
  99. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +143 -0
  100. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +677 -0
  101. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
  102. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
  103. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  104. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
  105. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
  106. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
  107. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  108. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  109. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  110. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  111. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
  112. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
  113. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  114. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
  115. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
  116. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
  117. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
  118. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +2222 -0
  119. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/gguf.h +202 -0
  120. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  121. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json.hpp +24766 -0
  122. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +265 -0
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
  131. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
  132. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
  133. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-io.h +35 -0
  134. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  135. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
  136. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
  137. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
  138. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +409 -0
  139. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
  140. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
  141. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +1434 -0
  142. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/log.h +132 -0
  143. package/{cpp → ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja}/chat-template.hpp +15 -7
  144. package/{cpp → ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja}/minja.hpp +120 -94
  145. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +128 -0
  146. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
  147. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sampling.h +107 -0
  148. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +14 -0
  149. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
  150. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/speculative.h +28 -0
  151. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
  152. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
  153. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode.h +66 -0
  154. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +802 -0
  155. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
  156. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  157. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
  160. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
  161. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
  162. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
  163. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  164. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
  165. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
  166. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
  167. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  168. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  169. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  170. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  171. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
  172. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
  173. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  174. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
  175. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
  176. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
  177. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
  178. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
  179. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
  180. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  181. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
  182. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
  183. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
  184. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
  185. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
  186. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
  187. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
  188. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
  189. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
  190. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
  191. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
  192. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
  193. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
  194. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  195. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
  196. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
  197. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
  198. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
  199. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
  200. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
  201. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
  202. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
  203. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  204. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  205. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
  206. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
  207. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
  208. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
  209. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
  210. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
  211. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
  212. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
  213. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
  214. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
  215. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  216. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
  217. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  218. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  219. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
  220. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +143 -0
  221. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +677 -0
  222. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
  223. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
  224. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  225. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
  226. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
  227. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
  228. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  229. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  230. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  231. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  232. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
  233. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
  234. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  235. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
  236. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
  237. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
  238. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
  239. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +2222 -0
  240. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/gguf.h +202 -0
  241. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  242. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json.hpp +24766 -0
  243. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
  244. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
  245. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
  246. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
  247. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +265 -0
  248. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
  249. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
  250. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
  251. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
  252. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
  253. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
  254. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-io.h +35 -0
  255. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  256. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
  257. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
  258. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
  259. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +409 -0
  260. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
  261. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
  262. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +1434 -0
  263. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/log.h +132 -0
  264. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  265. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  266. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +128 -0
  267. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
  268. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sampling.h +107 -0
  269. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +14 -0
  270. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
  271. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/speculative.h +28 -0
  272. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
  273. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
  274. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode.h +66 -0
  275. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/vec.h +802 -0
  276. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
  277. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  278. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  279. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
  280. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
  281. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
  282. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
  283. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
  284. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  285. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
  286. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
  287. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
  288. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  289. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  290. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  291. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  292. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
  293. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
  294. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  295. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
  296. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
  297. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
  298. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
  299. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
  300. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
  301. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  302. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
  303. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
  304. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
  305. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
  306. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
  307. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
  308. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
  309. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
  310. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
  311. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
  312. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
  313. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
  314. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
  315. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  316. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
  317. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
  318. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
  319. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
  320. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
  321. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
  322. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
  323. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
  324. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  325. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  326. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
  327. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
  328. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
  329. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
  330. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
  331. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
  332. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
  333. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
  334. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
  335. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
  336. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  337. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
  338. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  339. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  340. package/jest/mock.js +203 -203
  341. package/lib/commonjs/NativeRNLlama.js +1 -2
  342. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  343. package/lib/commonjs/chat.js.map +1 -1
  344. package/lib/commonjs/grammar.js +12 -31
  345. package/lib/commonjs/grammar.js.map +1 -1
  346. package/lib/commonjs/index.js +47 -47
  347. package/lib/commonjs/index.js.map +1 -1
  348. package/lib/commonjs/package.json +1 -0
  349. package/lib/module/NativeRNLlama.js +2 -0
  350. package/lib/module/NativeRNLlama.js.map +1 -1
  351. package/lib/module/chat.js +2 -0
  352. package/lib/module/chat.js.map +1 -1
  353. package/lib/module/grammar.js +14 -31
  354. package/lib/module/grammar.js.map +1 -1
  355. package/lib/module/index.js +47 -45
  356. package/lib/module/index.js.map +1 -1
  357. package/lib/module/package.json +1 -0
  358. package/lib/typescript/NativeRNLlama.d.ts +6 -4
  359. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  360. package/lib/typescript/index.d.ts.map +1 -1
  361. package/llama-rn.podspec +48 -48
  362. package/package.json +233 -233
  363. package/src/NativeRNLlama.ts +426 -424
  364. package/src/chat.ts +44 -44
  365. package/src/grammar.ts +854 -854
  366. package/src/index.ts +495 -485
package/src/index.ts CHANGED
@@ -1,485 +1,495 @@
1
- import { NativeEventEmitter, DeviceEventEmitter, Platform } from 'react-native'
2
- import type { DeviceEventEmitterStatic } from 'react-native'
3
- import RNLlama from './NativeRNLlama'
4
- import type {
5
- NativeContextParams,
6
- NativeLlamaContext,
7
- NativeCompletionParams,
8
- NativeCompletionTokenProb,
9
- NativeCompletionResult,
10
- NativeTokenizeResult,
11
- NativeEmbeddingResult,
12
- NativeSessionLoadResult,
13
- NativeCPUFeatures,
14
- NativeEmbeddingParams,
15
- NativeCompletionTokenProbItem,
16
- NativeCompletionResultTimings,
17
- JinjaFormattedChatResult,
18
- } from './NativeRNLlama'
19
- import type {
20
- SchemaGrammarConverterPropOrder,
21
- SchemaGrammarConverterBuiltinRule,
22
- } from './grammar'
23
- import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar'
24
- import type { RNLlamaMessagePart, RNLlamaOAICompatibleMessage } from './chat'
25
- import { formatChat } from './chat'
26
-
27
- export type {
28
- NativeContextParams,
29
- NativeLlamaContext,
30
- NativeCompletionParams,
31
- NativeCompletionTokenProb,
32
- NativeCompletionResult,
33
- NativeTokenizeResult,
34
- NativeEmbeddingResult,
35
- NativeSessionLoadResult,
36
- NativeEmbeddingParams,
37
- NativeCompletionTokenProbItem,
38
- NativeCompletionResultTimings,
39
- RNLlamaMessagePart,
40
- RNLlamaOAICompatibleMessage,
41
- JinjaFormattedChatResult,
42
-
43
- // Deprecated
44
- SchemaGrammarConverterPropOrder,
45
- SchemaGrammarConverterBuiltinRule,
46
- }
47
-
48
- export { SchemaGrammarConverter, convertJsonSchemaToGrammar }
49
-
50
- const EVENT_ON_INIT_CONTEXT_PROGRESS = '@RNLlama_onInitContextProgress'
51
- const EVENT_ON_TOKEN = '@RNLlama_onToken'
52
- const EVENT_ON_NATIVE_LOG = '@RNLlama_onNativeLog'
53
-
54
- let EventEmitter: NativeEventEmitter | DeviceEventEmitterStatic
55
- if (Platform.OS === 'ios') {
56
- // @ts-ignore
57
- EventEmitter = new NativeEventEmitter(RNLlama)
58
- }
59
- if (Platform.OS === 'android') {
60
- EventEmitter = DeviceEventEmitter
61
- }
62
-
63
- const logListeners: Array<(level: string, text: string) => void> = []
64
-
65
- // @ts-ignore
66
- if (EventEmitter) {
67
- EventEmitter.addListener(
68
- EVENT_ON_NATIVE_LOG,
69
- (evt: { level: string; text: string }) => {
70
- logListeners.forEach((listener) => listener(evt.level, evt.text))
71
- },
72
- )
73
- RNLlama?.toggleNativeLog?.(false) // Trigger unset to use default log callback
74
- }
75
-
76
- export type TokenData = {
77
- token: string
78
- completion_probabilities?: Array<NativeCompletionTokenProb>
79
- }
80
-
81
- type TokenNativeEvent = {
82
- contextId: number
83
- tokenResult: TokenData
84
- }
85
-
86
- export enum CACHE_TYPE {
87
- F16 = 'f16',
88
- F32 = 'f32',
89
- Q8_0 = 'q8_0',
90
- Q4_0 = 'q4_0',
91
- Q4_1 = 'q4_1',
92
- IQ4_NL = 'iq4_nl',
93
- Q5_0 = 'q5_0',
94
- Q5_1 = 'q5_1'
95
- }
96
-
97
-
98
- export type ContextParams = Omit<
99
- NativeContextParams,
100
- 'cache_type_k' | 'cache_type_v' | 'pooling_type'
101
- > & {
102
- cache_type_k?: CACHE_TYPE
103
- cache_type_v?: CACHE_TYPE
104
- pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
105
- }
106
-
107
- export type EmbeddingParams = NativeEmbeddingParams
108
-
109
- export type CompletionResponseFormat = {
110
- type: 'text' | 'json_object' | 'json_schema'
111
- json_schema?: {
112
- strict?: boolean
113
- schema: object
114
- }
115
- schema?: object // for json_object type
116
- }
117
-
118
- export type CompletionBaseParams = {
119
- prompt?: string
120
- messages?: RNLlamaOAICompatibleMessage[]
121
- chatTemplate?: string // deprecated
122
- chat_template?: string
123
- jinja?: boolean
124
- tools?: object
125
- parallel_tool_calls?: object
126
- tool_choice?: string
127
- response_format?: CompletionResponseFormat
128
- }
129
- export type CompletionParams = Omit<
130
- NativeCompletionParams,
131
- 'emit_partial_completion' | 'prompt'
132
- > &
133
- CompletionBaseParams
134
-
135
- export type BenchResult = {
136
- modelDesc: string
137
- modelSize: number
138
- modelNParams: number
139
- ppAvg: number
140
- ppStd: number
141
- tgAvg: number
142
- tgStd: number
143
- }
144
-
145
- const getJsonSchema = (responseFormat?: CompletionResponseFormat) => {
146
- if (responseFormat?.type === 'json_schema') {
147
- return responseFormat.json_schema?.schema
148
- }
149
- if (responseFormat?.type === 'json_object') {
150
- return responseFormat.schema || {}
151
- }
152
- return null
153
- }
154
-
155
- export class LlamaContext {
156
- id: number
157
-
158
- gpu: boolean = false
159
-
160
- reasonNoGPU: string = ''
161
-
162
- model: NativeLlamaContext['model']
163
-
164
- constructor({ contextId, gpu, reasonNoGPU, model }: NativeLlamaContext) {
165
- this.id = contextId
166
- this.gpu = gpu
167
- this.reasonNoGPU = reasonNoGPU
168
- this.model = model
169
- }
170
-
171
- /**
172
- * Load cached prompt & completion state from a file.
173
- */
174
- async loadSession(filepath: string): Promise<NativeSessionLoadResult> {
175
- let path = filepath
176
- if (path.startsWith('file://')) path = path.slice(7)
177
- return RNLlama.loadSession(this.id, path)
178
- }
179
-
180
- /**
181
- * Save current cached prompt & completion state to a file.
182
- */
183
- async saveSession(
184
- filepath: string,
185
- options?: { tokenSize: number },
186
- ): Promise<number> {
187
- return RNLlama.saveSession(this.id, filepath, options?.tokenSize || -1)
188
- }
189
-
190
- isLlamaChatSupported(): boolean {
191
- return !!this.model.chatTemplates.llamaChat
192
- }
193
-
194
- isJinjaSupported(): boolean {
195
- const { minja } = this.model.chatTemplates
196
- return !!minja?.toolUse || !!minja?.default
197
- }
198
-
199
- async getFormattedChat(
200
- messages: RNLlamaOAICompatibleMessage[],
201
- template?: string | null,
202
- params?: {
203
- jinja?: boolean
204
- response_format?: CompletionResponseFormat
205
- tools?: object
206
- parallel_tool_calls?: object
207
- tool_choice?: string
208
- },
209
- ): Promise<JinjaFormattedChatResult | string> {
210
- const chat = formatChat(messages)
211
- const useJinja = this.isJinjaSupported() && params?.jinja
212
- let tmpl = this.isLlamaChatSupported() || useJinja ? undefined : 'chatml'
213
- if (template) tmpl = template // Force replace if provided
214
- const jsonSchema = getJsonSchema(params?.response_format)
215
- return RNLlama.getFormattedChat(this.id, JSON.stringify(chat), tmpl, {
216
- jinja: useJinja,
217
- json_schema: jsonSchema ? JSON.stringify(jsonSchema) : undefined,
218
- tools: params?.tools ? JSON.stringify(params.tools) : undefined,
219
- parallel_tool_calls: params?.parallel_tool_calls
220
- ? JSON.stringify(params.parallel_tool_calls)
221
- : undefined,
222
- tool_choice: params?.tool_choice,
223
- })
224
- }
225
-
226
- async completion(
227
- params: CompletionParams,
228
- callback?: (data: TokenData) => void,
229
- ): Promise<NativeCompletionResult> {
230
- const nativeParams = {
231
- ...params,
232
- prompt: params.prompt || '',
233
- emit_partial_completion: !!callback,
234
- }
235
- if (params.messages) {
236
- // messages always win
237
- const formattedResult = await this.getFormattedChat(
238
- params.messages,
239
- params.chat_template || params.chatTemplate,
240
- {
241
- jinja: params.jinja,
242
- tools: params.tools,
243
- parallel_tool_calls: params.parallel_tool_calls,
244
- tool_choice: params.tool_choice,
245
- },
246
- )
247
- if (typeof formattedResult === 'string') {
248
- nativeParams.prompt = formattedResult || ''
249
- } else {
250
- nativeParams.prompt = formattedResult.prompt || ''
251
- if (typeof formattedResult.chat_format === 'number')
252
- nativeParams.chat_format = formattedResult.chat_format
253
- if (formattedResult.grammar)
254
- nativeParams.grammar = formattedResult.grammar
255
- if (typeof formattedResult.grammar_lazy === 'boolean')
256
- nativeParams.grammar_lazy = formattedResult.grammar_lazy
257
- if (formattedResult.grammar_triggers)
258
- nativeParams.grammar_triggers = formattedResult.grammar_triggers
259
- if (formattedResult.preserved_tokens)
260
- nativeParams.preserved_tokens = formattedResult.preserved_tokens
261
- if (formattedResult.additional_stops) {
262
- if (!nativeParams.stop) nativeParams.stop = []
263
- nativeParams.stop.push(...formattedResult.additional_stops)
264
- }
265
- }
266
- } else {
267
- nativeParams.prompt = params.prompt || ''
268
- }
269
-
270
- if (nativeParams.response_format && !nativeParams.grammar) {
271
- const jsonSchema = getJsonSchema(params.response_format)
272
- if (jsonSchema) nativeParams.json_schema = JSON.stringify(jsonSchema)
273
- }
274
-
275
- let tokenListener: any =
276
- callback &&
277
- EventEmitter.addListener(EVENT_ON_TOKEN, (evt: TokenNativeEvent) => {
278
- const { contextId, tokenResult } = evt
279
- if (contextId !== this.id) return
280
- callback(tokenResult)
281
- })
282
-
283
- if (!nativeParams.prompt) throw new Error('Prompt is required')
284
-
285
- const promise = RNLlama.completion(this.id, nativeParams)
286
- return promise
287
- .then((completionResult) => {
288
- tokenListener?.remove()
289
- tokenListener = null
290
- return completionResult
291
- })
292
- .catch((err: any) => {
293
- tokenListener?.remove()
294
- tokenListener = null
295
- throw err
296
- })
297
- }
298
-
299
- stopCompletion(): Promise<void> {
300
- return RNLlama.stopCompletion(this.id)
301
- }
302
-
303
- tokenizeAsync(text: string): Promise<NativeTokenizeResult> {
304
- return RNLlama.tokenizeAsync(this.id, text)
305
- }
306
-
307
- tokenizeSync(text: string): NativeTokenizeResult {
308
- return RNLlama.tokenizeSync(this.id, text)
309
- }
310
-
311
- detokenize(tokens: number[]): Promise<string> {
312
- return RNLlama.detokenize(this.id, tokens)
313
- }
314
-
315
- embedding(
316
- text: string,
317
- params?: EmbeddingParams,
318
- ): Promise<NativeEmbeddingResult> {
319
- return RNLlama.embedding(this.id, text, params || {})
320
- }
321
-
322
- async bench(
323
- pp: number,
324
- tg: number,
325
- pl: number,
326
- nr: number,
327
- ): Promise<BenchResult> {
328
- const result = await RNLlama.bench(this.id, pp, tg, pl, nr)
329
- const [modelDesc, modelSize, modelNParams, ppAvg, ppStd, tgAvg, tgStd] =
330
- JSON.parse(result)
331
- return {
332
- modelDesc,
333
- modelSize,
334
- modelNParams,
335
- ppAvg,
336
- ppStd,
337
- tgAvg,
338
- tgStd,
339
- }
340
- }
341
-
342
- async applyLoraAdapters(
343
- loraList: Array<{ path: string; scaled?: number }>,
344
- ): Promise<void> {
345
- let loraAdapters: Array<{ path: string; scaled?: number }> = []
346
- if (loraList)
347
- loraAdapters = loraList.map((l) => ({
348
- path: l.path.replace(/file:\/\//, ''),
349
- scaled: l.scaled,
350
- }))
351
- return RNLlama.applyLoraAdapters(this.id, loraAdapters)
352
- }
353
-
354
- async removeLoraAdapters(): Promise<void> {
355
- return RNLlama.removeLoraAdapters(this.id)
356
- }
357
-
358
- async getLoadedLoraAdapters(): Promise<
359
- Array<{ path: string; scaled?: number }>
360
- > {
361
- return RNLlama.getLoadedLoraAdapters(this.id)
362
- }
363
-
364
- async release(): Promise<void> {
365
- return RNLlama.releaseContext(this.id)
366
- }
367
- }
368
-
369
- export async function getCpuFeatures() : Promise<NativeCPUFeatures> {
370
- return RNLlama.getCpuFeatures()
371
- }
372
-
373
- export async function toggleNativeLog(enabled: boolean): Promise<void> {
374
- return RNLlama.toggleNativeLog(enabled)
375
- }
376
-
377
- export function addNativeLogListener(
378
- listener: (level: string, text: string) => void,
379
- ): { remove: () => void } {
380
- logListeners.push(listener)
381
- return {
382
- remove: () => {
383
- logListeners.splice(logListeners.indexOf(listener), 1)
384
- },
385
- }
386
- }
387
-
388
- export async function setContextLimit(limit: number): Promise<void> {
389
- return RNLlama.setContextLimit(limit)
390
- }
391
-
392
- let contextIdCounter = 0
393
- const contextIdRandom = () =>
394
- process.env.NODE_ENV === 'test' ? 0 : Math.floor(Math.random() * 100000)
395
-
396
- const modelInfoSkip = [
397
- // Large fields
398
- 'tokenizer.ggml.tokens',
399
- 'tokenizer.ggml.token_type',
400
- 'tokenizer.ggml.merges',
401
- ]
402
- export async function loadLlamaModelInfo(model: string): Promise<Object> {
403
- let path = model
404
- if (path.startsWith('file://')) path = path.slice(7)
405
- return RNLlama.modelInfo(path, modelInfoSkip)
406
- }
407
-
408
- const poolTypeMap = {
409
- // -1 is unspecified as undefined
410
- none: 0,
411
- mean: 1,
412
- cls: 2,
413
- last: 3,
414
- rank: 4,
415
- }
416
-
417
- export async function initLlama(
418
- {
419
- model,
420
- is_model_asset: isModelAsset,
421
- pooling_type: poolingType,
422
- lora,
423
- lora_list: loraList,
424
- ...rest
425
- }: ContextParams,
426
- onProgress?: (progress: number) => void,
427
- ): Promise<LlamaContext> {
428
- let path = model
429
- if (path.startsWith('file://')) path = path.slice(7)
430
-
431
- let loraPath = lora
432
- if (loraPath?.startsWith('file://')) loraPath = loraPath.slice(7)
433
-
434
- let loraAdapters: Array<{ path: string; scaled?: number }> = []
435
- if (loraList)
436
- loraAdapters = loraList.map((l) => ({
437
- path: l.path.replace(/file:\/\//, ''),
438
- scaled: l.scaled,
439
- }))
440
-
441
- const contextId = contextIdCounter + contextIdRandom()
442
- contextIdCounter += 1
443
-
444
- let removeProgressListener: any = null
445
- if (onProgress) {
446
- removeProgressListener = EventEmitter.addListener(
447
- EVENT_ON_INIT_CONTEXT_PROGRESS,
448
- (evt: { contextId: number; progress: number }) => {
449
- if (evt.contextId !== contextId) return
450
- onProgress(evt.progress)
451
- },
452
- )
453
- }
454
-
455
- const poolType = poolTypeMap[poolingType as keyof typeof poolTypeMap]
456
- const {
457
- gpu,
458
- reasonNoGPU,
459
- model: modelDetails,
460
- androidLib,
461
- } = await RNLlama.initContext(contextId, {
462
- model: path,
463
- is_model_asset: !!isModelAsset,
464
- use_progress_callback: !!onProgress,
465
- pooling_type: poolType,
466
- lora: loraPath,
467
- lora_list: loraAdapters,
468
- ...rest,
469
- }).catch((err: any) => {
470
- removeProgressListener?.remove()
471
- throw err
472
- })
473
- removeProgressListener?.remove()
474
- return new LlamaContext({
475
- contextId,
476
- gpu,
477
- reasonNoGPU,
478
- model: modelDetails,
479
- androidLib,
480
- })
481
- }
482
-
483
- export async function releaseAllLlama(): Promise<void> {
484
- return RNLlama.releaseAllContexts()
485
- }
1
+ import { NativeEventEmitter, DeviceEventEmitter, Platform } from 'react-native'
2
+ import type { DeviceEventEmitterStatic } from 'react-native'
3
+ import RNLlama from './NativeRNLlama'
4
+ import type {
5
+ NativeContextParams,
6
+ NativeLlamaContext,
7
+ NativeCompletionParams,
8
+ NativeCompletionTokenProb,
9
+ NativeCompletionResult,
10
+ NativeTokenizeResult,
11
+ NativeEmbeddingResult,
12
+ NativeSessionLoadResult,
13
+ NativeCPUFeatures,
14
+ NativeEmbeddingParams,
15
+ NativeCompletionTokenProbItem,
16
+ NativeCompletionResultTimings,
17
+ JinjaFormattedChatResult,
18
+ } from './NativeRNLlama'
19
+ import type {
20
+ SchemaGrammarConverterPropOrder,
21
+ SchemaGrammarConverterBuiltinRule,
22
+ } from './grammar'
23
+ import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar'
24
+ import type { RNLlamaMessagePart, RNLlamaOAICompatibleMessage } from './chat'
25
+ import { formatChat } from './chat'
26
+
27
+ export type {
28
+ NativeContextParams,
29
+ NativeLlamaContext,
30
+ NativeCompletionParams,
31
+ NativeCompletionTokenProb,
32
+ NativeCompletionResult,
33
+ NativeTokenizeResult,
34
+ NativeEmbeddingResult,
35
+ NativeSessionLoadResult,
36
+ NativeEmbeddingParams,
37
+ NativeCompletionTokenProbItem,
38
+ NativeCompletionResultTimings,
39
+ RNLlamaMessagePart,
40
+ RNLlamaOAICompatibleMessage,
41
+ JinjaFormattedChatResult,
42
+
43
+ // Deprecated
44
+ SchemaGrammarConverterPropOrder,
45
+ SchemaGrammarConverterBuiltinRule,
46
+ }
47
+
48
+ export { SchemaGrammarConverter, convertJsonSchemaToGrammar }
49
+
50
+ const EVENT_ON_INIT_CONTEXT_PROGRESS = '@RNLlama_onInitContextProgress'
51
+ const EVENT_ON_TOKEN = '@RNLlama_onToken'
52
+ const EVENT_ON_NATIVE_LOG = '@RNLlama_onNativeLog'
53
+
54
+ let EventEmitter: NativeEventEmitter | DeviceEventEmitterStatic
55
+ if (Platform.OS === 'ios') {
56
+ // @ts-ignore
57
+ EventEmitter = new NativeEventEmitter(RNLlama)
58
+ }
59
+ if (Platform.OS === 'android') {
60
+ EventEmitter = DeviceEventEmitter
61
+ }
62
+
63
+ const logListeners: Array<(level: string, text: string) => void> = []
64
+
65
+ // @ts-ignore
66
+ if (EventEmitter) {
67
+ EventEmitter.addListener(
68
+ EVENT_ON_NATIVE_LOG,
69
+ (evt: { level: string; text: string }) => {
70
+ logListeners.forEach((listener) => listener(evt.level, evt.text))
71
+ },
72
+ )
73
+ // Trigger unset to use default log callback
74
+ RNLlama?.toggleNativeLog?.(false)?.catch?.(() => {})
75
+ }
76
+
77
+ export type TokenData = {
78
+ token: string
79
+ completion_probabilities?: Array<NativeCompletionTokenProb>
80
+ }
81
+
82
+ type TokenNativeEvent = {
83
+ contextId: number
84
+ tokenResult: TokenData
85
+ }
86
+
87
+ export enum CACHE_TYPE {
88
+ F16 = 'f16',
89
+ F32 = 'f32',
90
+ Q8_0 = 'q8_0',
91
+ Q4_0 = 'q4_0',
92
+ Q4_1 = 'q4_1',
93
+ IQ4_NL = 'iq4_nl',
94
+ Q5_0 = 'q5_0',
95
+ Q5_1 = 'q5_1'
96
+ }
97
+
98
+
99
+ export type ContextParams = Omit<
100
+ NativeContextParams,
101
+ 'cache_type_k' | 'cache_type_v' | 'pooling_type'
102
+ > & {
103
+ cache_type_k?: CACHE_TYPE
104
+ cache_type_v?: CACHE_TYPE
105
+ pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
106
+ }
107
+
108
+ export type EmbeddingParams = NativeEmbeddingParams
109
+
110
+ export type CompletionResponseFormat = {
111
+ type: 'text' | 'json_object' | 'json_schema'
112
+ json_schema?: {
113
+ strict?: boolean
114
+ schema: object
115
+ }
116
+ schema?: object // for json_object type
117
+ }
118
+
119
+ export type CompletionBaseParams = {
120
+ prompt?: string
121
+ messages?: RNLlamaOAICompatibleMessage[]
122
+ chatTemplate?: string // deprecated
123
+ chat_template?: string
124
+ jinja?: boolean
125
+ tools?: object
126
+ parallel_tool_calls?: object
127
+ tool_choice?: string
128
+ response_format?: CompletionResponseFormat
129
+ }
130
+ export type CompletionParams = Omit<
131
+ NativeCompletionParams,
132
+ 'emit_partial_completion' | 'prompt'
133
+ > &
134
+ CompletionBaseParams
135
+
136
+ export type BenchResult = {
137
+ modelDesc: string
138
+ modelSize: number
139
+ modelNParams: number
140
+ ppAvg: number
141
+ ppStd: number
142
+ tgAvg: number
143
+ tgStd: number
144
+ }
145
+
146
+ const getJsonSchema = (responseFormat?: CompletionResponseFormat) => {
147
+ if (responseFormat?.type === 'json_schema') {
148
+ return responseFormat.json_schema?.schema
149
+ }
150
+ if (responseFormat?.type === 'json_object') {
151
+ return responseFormat.schema || {}
152
+ }
153
+ return null
154
+ }
155
+
156
+ export class LlamaContext {
157
+ id: number
158
+
159
+ gpu: boolean = false
160
+
161
+ reasonNoGPU: string = ''
162
+
163
+ model: NativeLlamaContext['model']
164
+
165
+ constructor({ contextId, gpu, reasonNoGPU, model }: NativeLlamaContext) {
166
+ this.id = contextId
167
+ this.gpu = gpu
168
+ this.reasonNoGPU = reasonNoGPU
169
+ this.model = model
170
+ }
171
+
172
+ /**
173
+ * Load cached prompt & completion state from a file.
174
+ */
175
+ async loadSession(filepath: string): Promise<NativeSessionLoadResult> {
176
+ let path = filepath
177
+ if (path.startsWith('file://')) path = path.slice(7)
178
+ return RNLlama.loadSession(this.id, path)
179
+ }
180
+
181
+ /**
182
+ * Save current cached prompt & completion state to a file.
183
+ */
184
+ async saveSession(
185
+ filepath: string,
186
+ options?: { tokenSize: number },
187
+ ): Promise<number> {
188
+ return RNLlama.saveSession(this.id, filepath, options?.tokenSize || -1)
189
+ }
190
+
191
+ isLlamaChatSupported(): boolean {
192
+ return !!this.model.chatTemplates.llamaChat
193
+ }
194
+
195
+ isJinjaSupported(): boolean {
196
+ const { minja } = this.model.chatTemplates
197
+ return !!minja?.toolUse || !!minja?.default
198
+ }
199
+
200
+ async getFormattedChat(
201
+ messages: RNLlamaOAICompatibleMessage[],
202
+ template?: string | null,
203
+ params?: {
204
+ jinja?: boolean
205
+ response_format?: CompletionResponseFormat
206
+ tools?: object
207
+ parallel_tool_calls?: object
208
+ tool_choice?: string
209
+ },
210
+ ): Promise<JinjaFormattedChatResult | string> {
211
+ const chat = formatChat(messages)
212
+ const useJinja = this.isJinjaSupported() && params?.jinja
213
+ let tmpl = this.isLlamaChatSupported() || useJinja ? undefined : 'chatml'
214
+ if (template) tmpl = template // Force replace if provided
215
+ const jsonSchema = getJsonSchema(params?.response_format)
216
+ return RNLlama.getFormattedChat(this.id, JSON.stringify(chat), tmpl, {
217
+ jinja: useJinja,
218
+ json_schema: jsonSchema ? JSON.stringify(jsonSchema) : undefined,
219
+ tools: params?.tools ? JSON.stringify(params.tools) : undefined,
220
+ parallel_tool_calls: params?.parallel_tool_calls
221
+ ? JSON.stringify(params.parallel_tool_calls)
222
+ : undefined,
223
+ tool_choice: params?.tool_choice,
224
+ })
225
+ }
226
+
227
+ async completion(
228
+ params: CompletionParams,
229
+ callback?: (data: TokenData) => void,
230
+ ): Promise<NativeCompletionResult> {
231
+ const nativeParams = {
232
+ ...params,
233
+ prompt: params.prompt || '',
234
+ emit_partial_completion: !!callback,
235
+ }
236
+ if (params.messages) {
237
+ // messages always win
238
+ const formattedResult = await this.getFormattedChat(
239
+ params.messages,
240
+ params.chat_template || params.chatTemplate,
241
+ {
242
+ jinja: params.jinja,
243
+ tools: params.tools,
244
+ parallel_tool_calls: params.parallel_tool_calls,
245
+ tool_choice: params.tool_choice,
246
+ },
247
+ )
248
+ if (typeof formattedResult === 'string') {
249
+ nativeParams.prompt = formattedResult || ''
250
+ } else {
251
+ nativeParams.prompt = formattedResult.prompt || ''
252
+ if (typeof formattedResult.chat_format === 'number')
253
+ nativeParams.chat_format = formattedResult.chat_format
254
+ if (formattedResult.grammar)
255
+ nativeParams.grammar = formattedResult.grammar
256
+ if (typeof formattedResult.grammar_lazy === 'boolean')
257
+ nativeParams.grammar_lazy = formattedResult.grammar_lazy
258
+ if (formattedResult.grammar_triggers)
259
+ nativeParams.grammar_triggers = formattedResult.grammar_triggers
260
+ if (formattedResult.preserved_tokens)
261
+ nativeParams.preserved_tokens = formattedResult.preserved_tokens
262
+ if (formattedResult.additional_stops) {
263
+ if (!nativeParams.stop) nativeParams.stop = []
264
+ nativeParams.stop.push(...formattedResult.additional_stops)
265
+ }
266
+ }
267
+ } else {
268
+ nativeParams.prompt = params.prompt || ''
269
+ }
270
+
271
+ if (nativeParams.response_format && !nativeParams.grammar) {
272
+ const jsonSchema = getJsonSchema(params.response_format)
273
+ if (jsonSchema) nativeParams.json_schema = JSON.stringify(jsonSchema)
274
+ }
275
+
276
+ let tokenListener: any =
277
+ callback &&
278
+ EventEmitter.addListener(EVENT_ON_TOKEN, (evt: TokenNativeEvent) => {
279
+ const { contextId, tokenResult } = evt
280
+ if (contextId !== this.id) return
281
+ callback(tokenResult)
282
+ })
283
+
284
+ if (!nativeParams.prompt) throw new Error('Prompt is required')
285
+
286
+ const promise = RNLlama.completion(this.id, nativeParams)
287
+ return promise
288
+ .then((completionResult) => {
289
+ tokenListener?.remove()
290
+ tokenListener = null
291
+ return completionResult
292
+ })
293
+ .catch((err: any) => {
294
+ tokenListener?.remove()
295
+ tokenListener = null
296
+ throw err
297
+ })
298
+ }
299
+
300
+ stopCompletion(): Promise<void> {
301
+ return RNLlama.stopCompletion(this.id)
302
+ }
303
+
304
+ tokenizeAsync(text: string): Promise<NativeTokenizeResult> {
305
+ return RNLlama.tokenizeAsync(this.id, text)
306
+ }
307
+
308
+ tokenizeSync(text: string): NativeTokenizeResult {
309
+ return RNLlama.tokenizeSync(this.id, text)
310
+ }
311
+
312
+ detokenize(tokens: number[]): Promise<string> {
313
+ return RNLlama.detokenize(this.id, tokens)
314
+ }
315
+
316
+ embedding(
317
+ text: string,
318
+ params?: EmbeddingParams,
319
+ ): Promise<NativeEmbeddingResult> {
320
+ return RNLlama.embedding(this.id, text, params || {})
321
+ }
322
+
323
+ async bench(
324
+ pp: number,
325
+ tg: number,
326
+ pl: number,
327
+ nr: number,
328
+ ): Promise<BenchResult> {
329
+ const result = await RNLlama.bench(this.id, pp, tg, pl, nr)
330
+ const [modelDesc, modelSize, modelNParams, ppAvg, ppStd, tgAvg, tgStd] =
331
+ JSON.parse(result)
332
+ return {
333
+ modelDesc,
334
+ modelSize,
335
+ modelNParams,
336
+ ppAvg,
337
+ ppStd,
338
+ tgAvg,
339
+ tgStd,
340
+ }
341
+ }
342
+
343
+ async applyLoraAdapters(
344
+ loraList: Array<{ path: string; scaled?: number }>,
345
+ ): Promise<void> {
346
+ let loraAdapters: Array<{ path: string; scaled?: number }> = []
347
+ if (loraList)
348
+ loraAdapters = loraList.map((l) => ({
349
+ path: l.path.replace(/file:\/\//, ''),
350
+ scaled: l.scaled,
351
+ }))
352
+ return RNLlama.applyLoraAdapters(this.id, loraAdapters)
353
+ }
354
+
355
+ async removeLoraAdapters(): Promise<void> {
356
+ return RNLlama.removeLoraAdapters(this.id)
357
+ }
358
+
359
+ async getLoadedLoraAdapters(): Promise<
360
+ Array<{ path: string; scaled?: number }>
361
+ > {
362
+ return RNLlama.getLoadedLoraAdapters(this.id)
363
+ }
364
+
365
+ async release(): Promise<void> {
366
+ return RNLlama.releaseContext(this.id)
367
+ }
368
+ }
369
+
370
+ export async function getCpuFeatures() : Promise<NativeCPUFeatures> {
371
+ if(Platform.OS === 'android') {
372
+ return RNLlama.getCpuFeatures()
373
+ }
374
+ console.warn("getCpuFeatures() is an android only feature")
375
+ return {
376
+ i8mm: false,
377
+ armv8: false,
378
+ dotprod: false,
379
+ }
380
+ }
381
+
382
+ export async function toggleNativeLog(enabled: boolean): Promise<void> {
383
+ return RNLlama.toggleNativeLog(enabled)
384
+ }
385
+
386
+ export function addNativeLogListener(
387
+ listener: (level: string, text: string) => void,
388
+ ): { remove: () => void } {
389
+ logListeners.push(listener)
390
+ return {
391
+ remove: () => {
392
+ logListeners.splice(logListeners.indexOf(listener), 1)
393
+ },
394
+ }
395
+ }
396
+
397
+ export async function setContextLimit(limit: number): Promise<void> {
398
+ return RNLlama.setContextLimit(limit)
399
+ }
400
+
401
+ let contextIdCounter = 0
402
+ const contextIdRandom = () =>
403
+ process.env.NODE_ENV === 'test' ? 0 : Math.floor(Math.random() * 100000)
404
+
405
+ const modelInfoSkip = [
406
+ // Large fields
407
+ 'tokenizer.ggml.tokens',
408
+ 'tokenizer.ggml.token_type',
409
+ 'tokenizer.ggml.merges',
410
+ 'tokenizer.ggml.scores'
411
+ ]
412
+ export async function loadLlamaModelInfo(model: string): Promise<Object> {
413
+ let path = model
414
+ if (path.startsWith('file://')) path = path.slice(7)
415
+ return RNLlama.modelInfo(path, modelInfoSkip)
416
+ }
417
+
418
+ const poolTypeMap = {
419
+ // -1 is unspecified as undefined
420
+ none: 0,
421
+ mean: 1,
422
+ cls: 2,
423
+ last: 3,
424
+ rank: 4,
425
+ }
426
+
427
+ export async function initLlama(
428
+ {
429
+ model,
430
+ is_model_asset: isModelAsset,
431
+ pooling_type: poolingType,
432
+ lora,
433
+ lora_list: loraList,
434
+ ...rest
435
+ }: ContextParams,
436
+ onProgress?: (progress: number) => void,
437
+ ): Promise<LlamaContext> {
438
+ let path = model
439
+ if (path.startsWith('file://')) path = path.slice(7)
440
+
441
+ let loraPath = lora
442
+ if (loraPath?.startsWith('file://')) loraPath = loraPath.slice(7)
443
+
444
+ let loraAdapters: Array<{ path: string; scaled?: number }> = []
445
+ if (loraList)
446
+ loraAdapters = loraList.map((l) => ({
447
+ path: l.path.replace(/file:\/\//, ''),
448
+ scaled: l.scaled,
449
+ }))
450
+
451
+ const contextId = contextIdCounter + contextIdRandom()
452
+ contextIdCounter += 1
453
+
454
+ let removeProgressListener: any = null
455
+ if (onProgress) {
456
+ removeProgressListener = EventEmitter.addListener(
457
+ EVENT_ON_INIT_CONTEXT_PROGRESS,
458
+ (evt: { contextId: number; progress: number }) => {
459
+ if (evt.contextId !== contextId) return
460
+ onProgress(evt.progress)
461
+ },
462
+ )
463
+ }
464
+
465
+ const poolType = poolTypeMap[poolingType as keyof typeof poolTypeMap]
466
+ const {
467
+ gpu,
468
+ reasonNoGPU,
469
+ model: modelDetails,
470
+ androidLib,
471
+ } = await RNLlama.initContext(contextId, {
472
+ model: path,
473
+ is_model_asset: !!isModelAsset,
474
+ use_progress_callback: !!onProgress,
475
+ pooling_type: poolType,
476
+ lora: loraPath,
477
+ lora_list: loraAdapters,
478
+ ...rest,
479
+ }).catch((err: any) => {
480
+ removeProgressListener?.remove()
481
+ throw err
482
+ })
483
+ removeProgressListener?.remove()
484
+ return new LlamaContext({
485
+ contextId,
486
+ gpu,
487
+ reasonNoGPU,
488
+ model: modelDetails,
489
+ androidLib,
490
+ })
491
+ }
492
+
493
+ export async function releaseAllLlama(): Promise<void> {
494
+ return RNLlama.releaseAllContexts()
495
+ }