cui-llama.rn 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (309) hide show
  1. package/LICENSE +20 -20
  2. package/README.md +317 -319
  3. package/android/build.gradle +116 -116
  4. package/android/gradle.properties +5 -5
  5. package/android/src/main/AndroidManifest.xml +4 -4
  6. package/android/src/main/CMakeLists.txt +124 -124
  7. package/android/src/main/java/com/rnllama/LlamaContext.java +645 -645
  8. package/android/src/main/java/com/rnllama/RNLlama.java +695 -695
  9. package/android/src/main/java/com/rnllama/RNLlamaPackage.java +48 -48
  10. package/android/src/main/jni-utils.h +100 -100
  11. package/android/src/main/jni.cpp +1263 -1263
  12. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  14. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  15. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  16. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  17. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  18. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  19. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  20. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +135 -135
  21. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +136 -136
  22. package/cpp/README.md +4 -4
  23. package/cpp/ggml-llama-sim.metallib +0 -0
  24. package/cpp/ggml-llama.metallib +0 -0
  25. package/cpp/ggml-metal-impl.h +597 -597
  26. package/cpp/ggml-metal.m +4 -0
  27. package/cpp/ggml.h +1 -1
  28. package/cpp/rn-llama.cpp +873 -873
  29. package/cpp/rn-llama.h +138 -138
  30. package/cpp/sampling.h +107 -107
  31. package/cpp/unicode-data.cpp +7034 -7034
  32. package/cpp/unicode-data.h +20 -20
  33. package/cpp/unicode.cpp +849 -849
  34. package/cpp/unicode.h +66 -66
  35. package/ios/CMakeLists.txt +116 -108
  36. package/ios/RNLlama.h +7 -7
  37. package/ios/RNLlama.mm +418 -405
  38. package/ios/RNLlamaContext.h +57 -57
  39. package/ios/RNLlamaContext.mm +835 -835
  40. package/ios/rnllama.xcframework/Info.plist +74 -74
  41. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
  42. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +143 -0
  43. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +677 -0
  44. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
  45. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
  46. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  47. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
  48. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
  49. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
  50. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  51. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  52. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  53. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  54. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
  55. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
  56. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  57. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
  58. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
  59. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
  60. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
  61. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +2222 -0
  62. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/gguf.h +202 -0
  63. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  64. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json.hpp +24766 -0
  65. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
  66. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
  67. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
  68. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
  69. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +265 -0
  70. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
  71. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
  72. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
  73. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
  74. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
  75. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
  76. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-io.h +35 -0
  77. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  78. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
  79. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
  80. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
  81. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +409 -0
  82. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
  83. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
  84. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +1434 -0
  85. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/log.h +132 -0
  86. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  87. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  88. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +128 -0
  89. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
  90. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sampling.h +107 -0
  91. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +14 -0
  92. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
  93. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/speculative.h +28 -0
  94. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
  95. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
  96. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode.h +66 -0
  97. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +802 -0
  98. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
  99. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  100. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  101. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
  102. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
  103. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
  104. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
  105. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
  106. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  107. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
  108. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
  109. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
  110. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  111. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  112. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  113. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  114. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
  115. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
  116. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  117. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
  118. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
  119. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
  120. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
  121. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
  122. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
  123. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  124. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
  125. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
  126. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
  127. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
  128. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
  129. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
  130. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
  131. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
  132. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
  133. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
  134. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
  135. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
  136. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
  137. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  138. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
  139. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
  140. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
  141. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
  142. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
  143. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
  144. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
  145. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
  146. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  147. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  148. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
  149. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
  150. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
  151. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
  152. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
  153. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
  154. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
  155. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
  156. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
  160. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  161. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  162. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
  163. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +143 -0
  164. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +677 -0
  165. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
  166. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
  167. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  168. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
  169. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
  170. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
  171. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  172. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  173. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  174. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  175. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
  176. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
  177. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  178. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
  179. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
  180. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
  181. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
  182. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +2222 -0
  183. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/gguf.h +202 -0
  184. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  185. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json.hpp +24766 -0
  186. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
  187. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
  188. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
  189. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
  190. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +265 -0
  191. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
  192. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
  193. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
  194. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
  195. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
  196. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
  197. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-io.h +35 -0
  198. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  199. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
  200. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
  201. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
  202. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +409 -0
  203. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
  204. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
  205. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +1434 -0
  206. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/log.h +132 -0
  207. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  208. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  209. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +128 -0
  210. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
  211. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sampling.h +107 -0
  212. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +14 -0
  213. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
  214. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/speculative.h +28 -0
  215. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
  216. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
  217. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode.h +66 -0
  218. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/vec.h +802 -0
  219. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
  220. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  221. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  222. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
  223. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
  224. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
  225. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
  226. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
  227. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  228. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
  229. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
  230. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
  231. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  232. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  233. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  234. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  235. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
  236. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
  237. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  238. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
  239. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
  240. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
  241. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
  242. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
  243. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
  244. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  245. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
  246. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
  247. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
  248. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
  249. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
  250. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
  251. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
  252. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
  253. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
  254. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
  255. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
  256. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
  257. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
  258. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  259. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
  260. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
  261. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
  262. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
  263. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
  264. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
  265. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
  266. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
  267. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  268. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  269. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
  270. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
  271. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
  272. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
  273. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
  274. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
  275. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
  276. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
  277. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
  278. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
  279. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  280. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
  281. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  282. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  283. package/jest/mock.js +203 -203
  284. package/lib/commonjs/NativeRNLlama.js +1 -2
  285. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  286. package/lib/commonjs/chat.js.map +1 -1
  287. package/lib/commonjs/grammar.js +12 -31
  288. package/lib/commonjs/grammar.js.map +1 -1
  289. package/lib/commonjs/index.js +47 -47
  290. package/lib/commonjs/index.js.map +1 -1
  291. package/lib/commonjs/package.json +1 -0
  292. package/lib/module/NativeRNLlama.js +2 -0
  293. package/lib/module/NativeRNLlama.js.map +1 -1
  294. package/lib/module/chat.js +2 -0
  295. package/lib/module/chat.js.map +1 -1
  296. package/lib/module/grammar.js +14 -31
  297. package/lib/module/grammar.js.map +1 -1
  298. package/lib/module/index.js +47 -45
  299. package/lib/module/index.js.map +1 -1
  300. package/lib/module/package.json +1 -0
  301. package/lib/typescript/NativeRNLlama.d.ts +6 -4
  302. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  303. package/lib/typescript/index.d.ts.map +1 -1
  304. package/llama-rn.podspec +48 -48
  305. package/package.json +233 -233
  306. package/src/NativeRNLlama.ts +426 -426
  307. package/src/chat.ts +44 -44
  308. package/src/grammar.ts +854 -854
  309. package/src/index.ts +495 -487
package/src/index.ts CHANGED
@@ -1,487 +1,495 @@
1
- import { NativeEventEmitter, DeviceEventEmitter, Platform } from 'react-native'
2
- import type { DeviceEventEmitterStatic } from 'react-native'
3
- import RNLlama from './NativeRNLlama'
4
- import type {
5
- NativeContextParams,
6
- NativeLlamaContext,
7
- NativeCompletionParams,
8
- NativeCompletionTokenProb,
9
- NativeCompletionResult,
10
- NativeTokenizeResult,
11
- NativeEmbeddingResult,
12
- NativeSessionLoadResult,
13
- NativeCPUFeatures,
14
- NativeEmbeddingParams,
15
- NativeCompletionTokenProbItem,
16
- NativeCompletionResultTimings,
17
- JinjaFormattedChatResult,
18
- } from './NativeRNLlama'
19
- import type {
20
- SchemaGrammarConverterPropOrder,
21
- SchemaGrammarConverterBuiltinRule,
22
- } from './grammar'
23
- import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar'
24
- import type { RNLlamaMessagePart, RNLlamaOAICompatibleMessage } from './chat'
25
- import { formatChat } from './chat'
26
-
27
- export type {
28
- NativeContextParams,
29
- NativeLlamaContext,
30
- NativeCompletionParams,
31
- NativeCompletionTokenProb,
32
- NativeCompletionResult,
33
- NativeTokenizeResult,
34
- NativeEmbeddingResult,
35
- NativeSessionLoadResult,
36
- NativeEmbeddingParams,
37
- NativeCompletionTokenProbItem,
38
- NativeCompletionResultTimings,
39
- RNLlamaMessagePart,
40
- RNLlamaOAICompatibleMessage,
41
- JinjaFormattedChatResult,
42
-
43
- // Deprecated
44
- SchemaGrammarConverterPropOrder,
45
- SchemaGrammarConverterBuiltinRule,
46
- }
47
-
48
- export { SchemaGrammarConverter, convertJsonSchemaToGrammar }
49
-
50
- const EVENT_ON_INIT_CONTEXT_PROGRESS = '@RNLlama_onInitContextProgress'
51
- const EVENT_ON_TOKEN = '@RNLlama_onToken'
52
- const EVENT_ON_NATIVE_LOG = '@RNLlama_onNativeLog'
53
-
54
- let EventEmitter: NativeEventEmitter | DeviceEventEmitterStatic
55
- if (Platform.OS === 'ios') {
56
- // @ts-ignore
57
- EventEmitter = new NativeEventEmitter(RNLlama)
58
- }
59
- if (Platform.OS === 'android') {
60
- EventEmitter = DeviceEventEmitter
61
- }
62
-
63
- const logListeners: Array<(level: string, text: string) => void> = []
64
-
65
- // @ts-ignore
66
- if (EventEmitter) {
67
- EventEmitter.addListener(
68
- EVENT_ON_NATIVE_LOG,
69
- (evt: { level: string; text: string }) => {
70
- logListeners.forEach((listener) => listener(evt.level, evt.text))
71
- },
72
- )
73
- // Trigger unset to use default log callback
74
- RNLlama?.toggleNativeLog?.(false)?.catch?.(() => {})
75
- }
76
-
77
- export type TokenData = {
78
- token: string
79
- completion_probabilities?: Array<NativeCompletionTokenProb>
80
- }
81
-
82
- type TokenNativeEvent = {
83
- contextId: number
84
- tokenResult: TokenData
85
- }
86
-
87
- export enum CACHE_TYPE {
88
- F16 = 'f16',
89
- F32 = 'f32',
90
- Q8_0 = 'q8_0',
91
- Q4_0 = 'q4_0',
92
- Q4_1 = 'q4_1',
93
- IQ4_NL = 'iq4_nl',
94
- Q5_0 = 'q5_0',
95
- Q5_1 = 'q5_1'
96
- }
97
-
98
-
99
- export type ContextParams = Omit<
100
- NativeContextParams,
101
- 'cache_type_k' | 'cache_type_v' | 'pooling_type'
102
- > & {
103
- cache_type_k?: CACHE_TYPE
104
- cache_type_v?: CACHE_TYPE
105
- pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
106
- }
107
-
108
- export type EmbeddingParams = NativeEmbeddingParams
109
-
110
- export type CompletionResponseFormat = {
111
- type: 'text' | 'json_object' | 'json_schema'
112
- json_schema?: {
113
- strict?: boolean
114
- schema: object
115
- }
116
- schema?: object // for json_object type
117
- }
118
-
119
- export type CompletionBaseParams = {
120
- prompt?: string
121
- messages?: RNLlamaOAICompatibleMessage[]
122
- chatTemplate?: string // deprecated
123
- chat_template?: string
124
- jinja?: boolean
125
- tools?: object
126
- parallel_tool_calls?: object
127
- tool_choice?: string
128
- response_format?: CompletionResponseFormat
129
- }
130
- export type CompletionParams = Omit<
131
- NativeCompletionParams,
132
- 'emit_partial_completion' | 'prompt'
133
- > &
134
- CompletionBaseParams
135
-
136
- export type BenchResult = {
137
- modelDesc: string
138
- modelSize: number
139
- modelNParams: number
140
- ppAvg: number
141
- ppStd: number
142
- tgAvg: number
143
- tgStd: number
144
- }
145
-
146
- const getJsonSchema = (responseFormat?: CompletionResponseFormat) => {
147
- if (responseFormat?.type === 'json_schema') {
148
- return responseFormat.json_schema?.schema
149
- }
150
- if (responseFormat?.type === 'json_object') {
151
- return responseFormat.schema || {}
152
- }
153
- return null
154
- }
155
-
156
- export class LlamaContext {
157
- id: number
158
-
159
- gpu: boolean = false
160
-
161
- reasonNoGPU: string = ''
162
-
163
- model: NativeLlamaContext['model']
164
-
165
- constructor({ contextId, gpu, reasonNoGPU, model }: NativeLlamaContext) {
166
- this.id = contextId
167
- this.gpu = gpu
168
- this.reasonNoGPU = reasonNoGPU
169
- this.model = model
170
- }
171
-
172
- /**
173
- * Load cached prompt & completion state from a file.
174
- */
175
- async loadSession(filepath: string): Promise<NativeSessionLoadResult> {
176
- let path = filepath
177
- if (path.startsWith('file://')) path = path.slice(7)
178
- return RNLlama.loadSession(this.id, path)
179
- }
180
-
181
- /**
182
- * Save current cached prompt & completion state to a file.
183
- */
184
- async saveSession(
185
- filepath: string,
186
- options?: { tokenSize: number },
187
- ): Promise<number> {
188
- return RNLlama.saveSession(this.id, filepath, options?.tokenSize || -1)
189
- }
190
-
191
- isLlamaChatSupported(): boolean {
192
- return !!this.model.chatTemplates.llamaChat
193
- }
194
-
195
- isJinjaSupported(): boolean {
196
- const { minja } = this.model.chatTemplates
197
- return !!minja?.toolUse || !!minja?.default
198
- }
199
-
200
- async getFormattedChat(
201
- messages: RNLlamaOAICompatibleMessage[],
202
- template?: string | null,
203
- params?: {
204
- jinja?: boolean
205
- response_format?: CompletionResponseFormat
206
- tools?: object
207
- parallel_tool_calls?: object
208
- tool_choice?: string
209
- },
210
- ): Promise<JinjaFormattedChatResult | string> {
211
- const chat = formatChat(messages)
212
- const useJinja = this.isJinjaSupported() && params?.jinja
213
- let tmpl = this.isLlamaChatSupported() || useJinja ? undefined : 'chatml'
214
- if (template) tmpl = template // Force replace if provided
215
- const jsonSchema = getJsonSchema(params?.response_format)
216
- return RNLlama.getFormattedChat(this.id, JSON.stringify(chat), tmpl, {
217
- jinja: useJinja,
218
- json_schema: jsonSchema ? JSON.stringify(jsonSchema) : undefined,
219
- tools: params?.tools ? JSON.stringify(params.tools) : undefined,
220
- parallel_tool_calls: params?.parallel_tool_calls
221
- ? JSON.stringify(params.parallel_tool_calls)
222
- : undefined,
223
- tool_choice: params?.tool_choice,
224
- })
225
- }
226
-
227
- async completion(
228
- params: CompletionParams,
229
- callback?: (data: TokenData) => void,
230
- ): Promise<NativeCompletionResult> {
231
- const nativeParams = {
232
- ...params,
233
- prompt: params.prompt || '',
234
- emit_partial_completion: !!callback,
235
- }
236
- if (params.messages) {
237
- // messages always win
238
- const formattedResult = await this.getFormattedChat(
239
- params.messages,
240
- params.chat_template || params.chatTemplate,
241
- {
242
- jinja: params.jinja,
243
- tools: params.tools,
244
- parallel_tool_calls: params.parallel_tool_calls,
245
- tool_choice: params.tool_choice,
246
- },
247
- )
248
- if (typeof formattedResult === 'string') {
249
- nativeParams.prompt = formattedResult || ''
250
- } else {
251
- nativeParams.prompt = formattedResult.prompt || ''
252
- if (typeof formattedResult.chat_format === 'number')
253
- nativeParams.chat_format = formattedResult.chat_format
254
- if (formattedResult.grammar)
255
- nativeParams.grammar = formattedResult.grammar
256
- if (typeof formattedResult.grammar_lazy === 'boolean')
257
- nativeParams.grammar_lazy = formattedResult.grammar_lazy
258
- if (formattedResult.grammar_triggers)
259
- nativeParams.grammar_triggers = formattedResult.grammar_triggers
260
- if (formattedResult.preserved_tokens)
261
- nativeParams.preserved_tokens = formattedResult.preserved_tokens
262
- if (formattedResult.additional_stops) {
263
- if (!nativeParams.stop) nativeParams.stop = []
264
- nativeParams.stop.push(...formattedResult.additional_stops)
265
- }
266
- }
267
- } else {
268
- nativeParams.prompt = params.prompt || ''
269
- }
270
-
271
- if (nativeParams.response_format && !nativeParams.grammar) {
272
- const jsonSchema = getJsonSchema(params.response_format)
273
- if (jsonSchema) nativeParams.json_schema = JSON.stringify(jsonSchema)
274
- }
275
-
276
- let tokenListener: any =
277
- callback &&
278
- EventEmitter.addListener(EVENT_ON_TOKEN, (evt: TokenNativeEvent) => {
279
- const { contextId, tokenResult } = evt
280
- if (contextId !== this.id) return
281
- callback(tokenResult)
282
- })
283
-
284
- if (!nativeParams.prompt) throw new Error('Prompt is required')
285
-
286
- const promise = RNLlama.completion(this.id, nativeParams)
287
- return promise
288
- .then((completionResult) => {
289
- tokenListener?.remove()
290
- tokenListener = null
291
- return completionResult
292
- })
293
- .catch((err: any) => {
294
- tokenListener?.remove()
295
- tokenListener = null
296
- throw err
297
- })
298
- }
299
-
300
- stopCompletion(): Promise<void> {
301
- return RNLlama.stopCompletion(this.id)
302
- }
303
-
304
- tokenizeAsync(text: string): Promise<NativeTokenizeResult> {
305
- return RNLlama.tokenizeAsync(this.id, text)
306
- }
307
-
308
- tokenizeSync(text: string): NativeTokenizeResult {
309
- return RNLlama.tokenizeSync(this.id, text)
310
- }
311
-
312
- detokenize(tokens: number[]): Promise<string> {
313
- return RNLlama.detokenize(this.id, tokens)
314
- }
315
-
316
- embedding(
317
- text: string,
318
- params?: EmbeddingParams,
319
- ): Promise<NativeEmbeddingResult> {
320
- return RNLlama.embedding(this.id, text, params || {})
321
- }
322
-
323
- async bench(
324
- pp: number,
325
- tg: number,
326
- pl: number,
327
- nr: number,
328
- ): Promise<BenchResult> {
329
- const result = await RNLlama.bench(this.id, pp, tg, pl, nr)
330
- const [modelDesc, modelSize, modelNParams, ppAvg, ppStd, tgAvg, tgStd] =
331
- JSON.parse(result)
332
- return {
333
- modelDesc,
334
- modelSize,
335
- modelNParams,
336
- ppAvg,
337
- ppStd,
338
- tgAvg,
339
- tgStd,
340
- }
341
- }
342
-
343
- async applyLoraAdapters(
344
- loraList: Array<{ path: string; scaled?: number }>,
345
- ): Promise<void> {
346
- let loraAdapters: Array<{ path: string; scaled?: number }> = []
347
- if (loraList)
348
- loraAdapters = loraList.map((l) => ({
349
- path: l.path.replace(/file:\/\//, ''),
350
- scaled: l.scaled,
351
- }))
352
- return RNLlama.applyLoraAdapters(this.id, loraAdapters)
353
- }
354
-
355
- async removeLoraAdapters(): Promise<void> {
356
- return RNLlama.removeLoraAdapters(this.id)
357
- }
358
-
359
- async getLoadedLoraAdapters(): Promise<
360
- Array<{ path: string; scaled?: number }>
361
- > {
362
- return RNLlama.getLoadedLoraAdapters(this.id)
363
- }
364
-
365
- async release(): Promise<void> {
366
- return RNLlama.releaseContext(this.id)
367
- }
368
- }
369
-
370
- export async function getCpuFeatures() : Promise<NativeCPUFeatures> {
371
- return RNLlama.getCpuFeatures()
372
- }
373
-
374
- export async function toggleNativeLog(enabled: boolean): Promise<void> {
375
- return RNLlama.toggleNativeLog(enabled)
376
- }
377
-
378
- export function addNativeLogListener(
379
- listener: (level: string, text: string) => void,
380
- ): { remove: () => void } {
381
- logListeners.push(listener)
382
- return {
383
- remove: () => {
384
- logListeners.splice(logListeners.indexOf(listener), 1)
385
- },
386
- }
387
- }
388
-
389
- export async function setContextLimit(limit: number): Promise<void> {
390
- return RNLlama.setContextLimit(limit)
391
- }
392
-
393
- let contextIdCounter = 0
394
- const contextIdRandom = () =>
395
- process.env.NODE_ENV === 'test' ? 0 : Math.floor(Math.random() * 100000)
396
-
397
- const modelInfoSkip = [
398
- // Large fields
399
- 'tokenizer.ggml.tokens',
400
- 'tokenizer.ggml.token_type',
401
- 'tokenizer.ggml.merges',
402
- 'tokenizer.ggml.scores'
403
- ]
404
- export async function loadLlamaModelInfo(model: string): Promise<Object> {
405
- let path = model
406
- if (path.startsWith('file://')) path = path.slice(7)
407
- return RNLlama.modelInfo(path, modelInfoSkip)
408
- }
409
-
410
- const poolTypeMap = {
411
- // -1 is unspecified as undefined
412
- none: 0,
413
- mean: 1,
414
- cls: 2,
415
- last: 3,
416
- rank: 4,
417
- }
418
-
419
- export async function initLlama(
420
- {
421
- model,
422
- is_model_asset: isModelAsset,
423
- pooling_type: poolingType,
424
- lora,
425
- lora_list: loraList,
426
- ...rest
427
- }: ContextParams,
428
- onProgress?: (progress: number) => void,
429
- ): Promise<LlamaContext> {
430
- let path = model
431
- if (path.startsWith('file://')) path = path.slice(7)
432
-
433
- let loraPath = lora
434
- if (loraPath?.startsWith('file://')) loraPath = loraPath.slice(7)
435
-
436
- let loraAdapters: Array<{ path: string; scaled?: number }> = []
437
- if (loraList)
438
- loraAdapters = loraList.map((l) => ({
439
- path: l.path.replace(/file:\/\//, ''),
440
- scaled: l.scaled,
441
- }))
442
-
443
- const contextId = contextIdCounter + contextIdRandom()
444
- contextIdCounter += 1
445
-
446
- let removeProgressListener: any = null
447
- if (onProgress) {
448
- removeProgressListener = EventEmitter.addListener(
449
- EVENT_ON_INIT_CONTEXT_PROGRESS,
450
- (evt: { contextId: number; progress: number }) => {
451
- if (evt.contextId !== contextId) return
452
- onProgress(evt.progress)
453
- },
454
- )
455
- }
456
-
457
- const poolType = poolTypeMap[poolingType as keyof typeof poolTypeMap]
458
- const {
459
- gpu,
460
- reasonNoGPU,
461
- model: modelDetails,
462
- androidLib,
463
- } = await RNLlama.initContext(contextId, {
464
- model: path,
465
- is_model_asset: !!isModelAsset,
466
- use_progress_callback: !!onProgress,
467
- pooling_type: poolType,
468
- lora: loraPath,
469
- lora_list: loraAdapters,
470
- ...rest,
471
- }).catch((err: any) => {
472
- removeProgressListener?.remove()
473
- throw err
474
- })
475
- removeProgressListener?.remove()
476
- return new LlamaContext({
477
- contextId,
478
- gpu,
479
- reasonNoGPU,
480
- model: modelDetails,
481
- androidLib,
482
- })
483
- }
484
-
485
- export async function releaseAllLlama(): Promise<void> {
486
- return RNLlama.releaseAllContexts()
487
- }
1
+ import { NativeEventEmitter, DeviceEventEmitter, Platform } from 'react-native'
2
+ import type { DeviceEventEmitterStatic } from 'react-native'
3
+ import RNLlama from './NativeRNLlama'
4
+ import type {
5
+ NativeContextParams,
6
+ NativeLlamaContext,
7
+ NativeCompletionParams,
8
+ NativeCompletionTokenProb,
9
+ NativeCompletionResult,
10
+ NativeTokenizeResult,
11
+ NativeEmbeddingResult,
12
+ NativeSessionLoadResult,
13
+ NativeCPUFeatures,
14
+ NativeEmbeddingParams,
15
+ NativeCompletionTokenProbItem,
16
+ NativeCompletionResultTimings,
17
+ JinjaFormattedChatResult,
18
+ } from './NativeRNLlama'
19
+ import type {
20
+ SchemaGrammarConverterPropOrder,
21
+ SchemaGrammarConverterBuiltinRule,
22
+ } from './grammar'
23
+ import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar'
24
+ import type { RNLlamaMessagePart, RNLlamaOAICompatibleMessage } from './chat'
25
+ import { formatChat } from './chat'
26
+
27
+ export type {
28
+ NativeContextParams,
29
+ NativeLlamaContext,
30
+ NativeCompletionParams,
31
+ NativeCompletionTokenProb,
32
+ NativeCompletionResult,
33
+ NativeTokenizeResult,
34
+ NativeEmbeddingResult,
35
+ NativeSessionLoadResult,
36
+ NativeEmbeddingParams,
37
+ NativeCompletionTokenProbItem,
38
+ NativeCompletionResultTimings,
39
+ RNLlamaMessagePart,
40
+ RNLlamaOAICompatibleMessage,
41
+ JinjaFormattedChatResult,
42
+
43
+ // Deprecated
44
+ SchemaGrammarConverterPropOrder,
45
+ SchemaGrammarConverterBuiltinRule,
46
+ }
47
+
48
+ export { SchemaGrammarConverter, convertJsonSchemaToGrammar }
49
+
50
+ const EVENT_ON_INIT_CONTEXT_PROGRESS = '@RNLlama_onInitContextProgress'
51
+ const EVENT_ON_TOKEN = '@RNLlama_onToken'
52
+ const EVENT_ON_NATIVE_LOG = '@RNLlama_onNativeLog'
53
+
54
+ let EventEmitter: NativeEventEmitter | DeviceEventEmitterStatic
55
+ if (Platform.OS === 'ios') {
56
+ // @ts-ignore
57
+ EventEmitter = new NativeEventEmitter(RNLlama)
58
+ }
59
+ if (Platform.OS === 'android') {
60
+ EventEmitter = DeviceEventEmitter
61
+ }
62
+
63
+ const logListeners: Array<(level: string, text: string) => void> = []
64
+
65
+ // @ts-ignore
66
+ if (EventEmitter) {
67
+ EventEmitter.addListener(
68
+ EVENT_ON_NATIVE_LOG,
69
+ (evt: { level: string; text: string }) => {
70
+ logListeners.forEach((listener) => listener(evt.level, evt.text))
71
+ },
72
+ )
73
+ // Trigger unset to use default log callback
74
+ RNLlama?.toggleNativeLog?.(false)?.catch?.(() => {})
75
+ }
76
+
77
+ export type TokenData = {
78
+ token: string
79
+ completion_probabilities?: Array<NativeCompletionTokenProb>
80
+ }
81
+
82
+ type TokenNativeEvent = {
83
+ contextId: number
84
+ tokenResult: TokenData
85
+ }
86
+
87
+ export enum CACHE_TYPE {
88
+ F16 = 'f16',
89
+ F32 = 'f32',
90
+ Q8_0 = 'q8_0',
91
+ Q4_0 = 'q4_0',
92
+ Q4_1 = 'q4_1',
93
+ IQ4_NL = 'iq4_nl',
94
+ Q5_0 = 'q5_0',
95
+ Q5_1 = 'q5_1'
96
+ }
97
+
98
+
99
+ export type ContextParams = Omit<
100
+ NativeContextParams,
101
+ 'cache_type_k' | 'cache_type_v' | 'pooling_type'
102
+ > & {
103
+ cache_type_k?: CACHE_TYPE
104
+ cache_type_v?: CACHE_TYPE
105
+ pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
106
+ }
107
+
108
+ export type EmbeddingParams = NativeEmbeddingParams
109
+
110
+ export type CompletionResponseFormat = {
111
+ type: 'text' | 'json_object' | 'json_schema'
112
+ json_schema?: {
113
+ strict?: boolean
114
+ schema: object
115
+ }
116
+ schema?: object // for json_object type
117
+ }
118
+
119
+ export type CompletionBaseParams = {
120
+ prompt?: string
121
+ messages?: RNLlamaOAICompatibleMessage[]
122
+ chatTemplate?: string // deprecated
123
+ chat_template?: string
124
+ jinja?: boolean
125
+ tools?: object
126
+ parallel_tool_calls?: object
127
+ tool_choice?: string
128
+ response_format?: CompletionResponseFormat
129
+ }
130
+ export type CompletionParams = Omit<
131
+ NativeCompletionParams,
132
+ 'emit_partial_completion' | 'prompt'
133
+ > &
134
+ CompletionBaseParams
135
+
136
+ export type BenchResult = {
137
+ modelDesc: string
138
+ modelSize: number
139
+ modelNParams: number
140
+ ppAvg: number
141
+ ppStd: number
142
+ tgAvg: number
143
+ tgStd: number
144
+ }
145
+
146
+ const getJsonSchema = (responseFormat?: CompletionResponseFormat) => {
147
+ if (responseFormat?.type === 'json_schema') {
148
+ return responseFormat.json_schema?.schema
149
+ }
150
+ if (responseFormat?.type === 'json_object') {
151
+ return responseFormat.schema || {}
152
+ }
153
+ return null
154
+ }
155
+
156
+ export class LlamaContext {
157
+ id: number
158
+
159
+ gpu: boolean = false
160
+
161
+ reasonNoGPU: string = ''
162
+
163
+ model: NativeLlamaContext['model']
164
+
165
+ constructor({ contextId, gpu, reasonNoGPU, model }: NativeLlamaContext) {
166
+ this.id = contextId
167
+ this.gpu = gpu
168
+ this.reasonNoGPU = reasonNoGPU
169
+ this.model = model
170
+ }
171
+
172
+ /**
173
+ * Load cached prompt & completion state from a file.
174
+ */
175
+ async loadSession(filepath: string): Promise<NativeSessionLoadResult> {
176
+ let path = filepath
177
+ if (path.startsWith('file://')) path = path.slice(7)
178
+ return RNLlama.loadSession(this.id, path)
179
+ }
180
+
181
+ /**
182
+ * Save current cached prompt & completion state to a file.
183
+ */
184
+ async saveSession(
185
+ filepath: string,
186
+ options?: { tokenSize: number },
187
+ ): Promise<number> {
188
+ return RNLlama.saveSession(this.id, filepath, options?.tokenSize || -1)
189
+ }
190
+
191
+ isLlamaChatSupported(): boolean {
192
+ return !!this.model.chatTemplates.llamaChat
193
+ }
194
+
195
+ isJinjaSupported(): boolean {
196
+ const { minja } = this.model.chatTemplates
197
+ return !!minja?.toolUse || !!minja?.default
198
+ }
199
+
200
+ async getFormattedChat(
201
+ messages: RNLlamaOAICompatibleMessage[],
202
+ template?: string | null,
203
+ params?: {
204
+ jinja?: boolean
205
+ response_format?: CompletionResponseFormat
206
+ tools?: object
207
+ parallel_tool_calls?: object
208
+ tool_choice?: string
209
+ },
210
+ ): Promise<JinjaFormattedChatResult | string> {
211
+ const chat = formatChat(messages)
212
+ const useJinja = this.isJinjaSupported() && params?.jinja
213
+ let tmpl = this.isLlamaChatSupported() || useJinja ? undefined : 'chatml'
214
+ if (template) tmpl = template // Force replace if provided
215
+ const jsonSchema = getJsonSchema(params?.response_format)
216
+ return RNLlama.getFormattedChat(this.id, JSON.stringify(chat), tmpl, {
217
+ jinja: useJinja,
218
+ json_schema: jsonSchema ? JSON.stringify(jsonSchema) : undefined,
219
+ tools: params?.tools ? JSON.stringify(params.tools) : undefined,
220
+ parallel_tool_calls: params?.parallel_tool_calls
221
+ ? JSON.stringify(params.parallel_tool_calls)
222
+ : undefined,
223
+ tool_choice: params?.tool_choice,
224
+ })
225
+ }
226
+
227
+ async completion(
228
+ params: CompletionParams,
229
+ callback?: (data: TokenData) => void,
230
+ ): Promise<NativeCompletionResult> {
231
+ const nativeParams = {
232
+ ...params,
233
+ prompt: params.prompt || '',
234
+ emit_partial_completion: !!callback,
235
+ }
236
+ if (params.messages) {
237
+ // messages always win
238
+ const formattedResult = await this.getFormattedChat(
239
+ params.messages,
240
+ params.chat_template || params.chatTemplate,
241
+ {
242
+ jinja: params.jinja,
243
+ tools: params.tools,
244
+ parallel_tool_calls: params.parallel_tool_calls,
245
+ tool_choice: params.tool_choice,
246
+ },
247
+ )
248
+ if (typeof formattedResult === 'string') {
249
+ nativeParams.prompt = formattedResult || ''
250
+ } else {
251
+ nativeParams.prompt = formattedResult.prompt || ''
252
+ if (typeof formattedResult.chat_format === 'number')
253
+ nativeParams.chat_format = formattedResult.chat_format
254
+ if (formattedResult.grammar)
255
+ nativeParams.grammar = formattedResult.grammar
256
+ if (typeof formattedResult.grammar_lazy === 'boolean')
257
+ nativeParams.grammar_lazy = formattedResult.grammar_lazy
258
+ if (formattedResult.grammar_triggers)
259
+ nativeParams.grammar_triggers = formattedResult.grammar_triggers
260
+ if (formattedResult.preserved_tokens)
261
+ nativeParams.preserved_tokens = formattedResult.preserved_tokens
262
+ if (formattedResult.additional_stops) {
263
+ if (!nativeParams.stop) nativeParams.stop = []
264
+ nativeParams.stop.push(...formattedResult.additional_stops)
265
+ }
266
+ }
267
+ } else {
268
+ nativeParams.prompt = params.prompt || ''
269
+ }
270
+
271
+ if (nativeParams.response_format && !nativeParams.grammar) {
272
+ const jsonSchema = getJsonSchema(params.response_format)
273
+ if (jsonSchema) nativeParams.json_schema = JSON.stringify(jsonSchema)
274
+ }
275
+
276
+ let tokenListener: any =
277
+ callback &&
278
+ EventEmitter.addListener(EVENT_ON_TOKEN, (evt: TokenNativeEvent) => {
279
+ const { contextId, tokenResult } = evt
280
+ if (contextId !== this.id) return
281
+ callback(tokenResult)
282
+ })
283
+
284
+ if (!nativeParams.prompt) throw new Error('Prompt is required')
285
+
286
+ const promise = RNLlama.completion(this.id, nativeParams)
287
+ return promise
288
+ .then((completionResult) => {
289
+ tokenListener?.remove()
290
+ tokenListener = null
291
+ return completionResult
292
+ })
293
+ .catch((err: any) => {
294
+ tokenListener?.remove()
295
+ tokenListener = null
296
+ throw err
297
+ })
298
+ }
299
+
300
+ stopCompletion(): Promise<void> {
301
+ return RNLlama.stopCompletion(this.id)
302
+ }
303
+
304
+ tokenizeAsync(text: string): Promise<NativeTokenizeResult> {
305
+ return RNLlama.tokenizeAsync(this.id, text)
306
+ }
307
+
308
+ tokenizeSync(text: string): NativeTokenizeResult {
309
+ return RNLlama.tokenizeSync(this.id, text)
310
+ }
311
+
312
+ detokenize(tokens: number[]): Promise<string> {
313
+ return RNLlama.detokenize(this.id, tokens)
314
+ }
315
+
316
+ embedding(
317
+ text: string,
318
+ params?: EmbeddingParams,
319
+ ): Promise<NativeEmbeddingResult> {
320
+ return RNLlama.embedding(this.id, text, params || {})
321
+ }
322
+
323
+ async bench(
324
+ pp: number,
325
+ tg: number,
326
+ pl: number,
327
+ nr: number,
328
+ ): Promise<BenchResult> {
329
+ const result = await RNLlama.bench(this.id, pp, tg, pl, nr)
330
+ const [modelDesc, modelSize, modelNParams, ppAvg, ppStd, tgAvg, tgStd] =
331
+ JSON.parse(result)
332
+ return {
333
+ modelDesc,
334
+ modelSize,
335
+ modelNParams,
336
+ ppAvg,
337
+ ppStd,
338
+ tgAvg,
339
+ tgStd,
340
+ }
341
+ }
342
+
343
+ async applyLoraAdapters(
344
+ loraList: Array<{ path: string; scaled?: number }>,
345
+ ): Promise<void> {
346
+ let loraAdapters: Array<{ path: string; scaled?: number }> = []
347
+ if (loraList)
348
+ loraAdapters = loraList.map((l) => ({
349
+ path: l.path.replace(/file:\/\//, ''),
350
+ scaled: l.scaled,
351
+ }))
352
+ return RNLlama.applyLoraAdapters(this.id, loraAdapters)
353
+ }
354
+
355
+ async removeLoraAdapters(): Promise<void> {
356
+ return RNLlama.removeLoraAdapters(this.id)
357
+ }
358
+
359
+ async getLoadedLoraAdapters(): Promise<
360
+ Array<{ path: string; scaled?: number }>
361
+ > {
362
+ return RNLlama.getLoadedLoraAdapters(this.id)
363
+ }
364
+
365
+ async release(): Promise<void> {
366
+ return RNLlama.releaseContext(this.id)
367
+ }
368
+ }
369
+
370
+ export async function getCpuFeatures() : Promise<NativeCPUFeatures> {
371
+ if(Platform.OS === 'android') {
372
+ return RNLlama.getCpuFeatures()
373
+ }
374
+ console.warn("getCpuFeatures() is an android only feature")
375
+ return {
376
+ i8mm: false,
377
+ armv8: false,
378
+ dotprod: false,
379
+ }
380
+ }
381
+
382
+ export async function toggleNativeLog(enabled: boolean): Promise<void> {
383
+ return RNLlama.toggleNativeLog(enabled)
384
+ }
385
+
386
+ export function addNativeLogListener(
387
+ listener: (level: string, text: string) => void,
388
+ ): { remove: () => void } {
389
+ logListeners.push(listener)
390
+ return {
391
+ remove: () => {
392
+ logListeners.splice(logListeners.indexOf(listener), 1)
393
+ },
394
+ }
395
+ }
396
+
397
+ export async function setContextLimit(limit: number): Promise<void> {
398
+ return RNLlama.setContextLimit(limit)
399
+ }
400
+
401
+ let contextIdCounter = 0
402
+ const contextIdRandom = () =>
403
+ process.env.NODE_ENV === 'test' ? 0 : Math.floor(Math.random() * 100000)
404
+
405
+ const modelInfoSkip = [
406
+ // Large fields
407
+ 'tokenizer.ggml.tokens',
408
+ 'tokenizer.ggml.token_type',
409
+ 'tokenizer.ggml.merges',
410
+ 'tokenizer.ggml.scores'
411
+ ]
412
+ export async function loadLlamaModelInfo(model: string): Promise<Object> {
413
+ let path = model
414
+ if (path.startsWith('file://')) path = path.slice(7)
415
+ return RNLlama.modelInfo(path, modelInfoSkip)
416
+ }
417
+
418
+ const poolTypeMap = {
419
+ // -1 is unspecified as undefined
420
+ none: 0,
421
+ mean: 1,
422
+ cls: 2,
423
+ last: 3,
424
+ rank: 4,
425
+ }
426
+
427
+ export async function initLlama(
428
+ {
429
+ model,
430
+ is_model_asset: isModelAsset,
431
+ pooling_type: poolingType,
432
+ lora,
433
+ lora_list: loraList,
434
+ ...rest
435
+ }: ContextParams,
436
+ onProgress?: (progress: number) => void,
437
+ ): Promise<LlamaContext> {
438
+ let path = model
439
+ if (path.startsWith('file://')) path = path.slice(7)
440
+
441
+ let loraPath = lora
442
+ if (loraPath?.startsWith('file://')) loraPath = loraPath.slice(7)
443
+
444
+ let loraAdapters: Array<{ path: string; scaled?: number }> = []
445
+ if (loraList)
446
+ loraAdapters = loraList.map((l) => ({
447
+ path: l.path.replace(/file:\/\//, ''),
448
+ scaled: l.scaled,
449
+ }))
450
+
451
+ const contextId = contextIdCounter + contextIdRandom()
452
+ contextIdCounter += 1
453
+
454
+ let removeProgressListener: any = null
455
+ if (onProgress) {
456
+ removeProgressListener = EventEmitter.addListener(
457
+ EVENT_ON_INIT_CONTEXT_PROGRESS,
458
+ (evt: { contextId: number; progress: number }) => {
459
+ if (evt.contextId !== contextId) return
460
+ onProgress(evt.progress)
461
+ },
462
+ )
463
+ }
464
+
465
+ const poolType = poolTypeMap[poolingType as keyof typeof poolTypeMap]
466
+ const {
467
+ gpu,
468
+ reasonNoGPU,
469
+ model: modelDetails,
470
+ androidLib,
471
+ } = await RNLlama.initContext(contextId, {
472
+ model: path,
473
+ is_model_asset: !!isModelAsset,
474
+ use_progress_callback: !!onProgress,
475
+ pooling_type: poolType,
476
+ lora: loraPath,
477
+ lora_list: loraAdapters,
478
+ ...rest,
479
+ }).catch((err: any) => {
480
+ removeProgressListener?.remove()
481
+ throw err
482
+ })
483
+ removeProgressListener?.remove()
484
+ return new LlamaContext({
485
+ contextId,
486
+ gpu,
487
+ reasonNoGPU,
488
+ model: modelDetails,
489
+ androidLib,
490
+ })
491
+ }
492
+
493
+ export async function releaseAllLlama(): Promise<void> {
494
+ return RNLlama.releaseAllContexts()
495
+ }