cui-llama.rn 1.4.6 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (366) hide show
  1. package/LICENSE +20 -20
  2. package/README.md +317 -319
  3. package/android/build.gradle +116 -116
  4. package/android/gradle.properties +5 -5
  5. package/android/src/main/AndroidManifest.xml +4 -4
  6. package/android/src/main/CMakeLists.txt +124 -117
  7. package/android/src/main/java/com/rnllama/LlamaContext.java +645 -645
  8. package/android/src/main/java/com/rnllama/RNLlama.java +695 -695
  9. package/android/src/main/java/com/rnllama/RNLlamaPackage.java +48 -48
  10. package/android/src/main/jni-utils.h +100 -100
  11. package/android/src/main/jni.cpp +1263 -1245
  12. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  14. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  15. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  16. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  17. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  18. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  19. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  20. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +135 -135
  21. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +136 -136
  22. package/cpp/README.md +4 -4
  23. package/cpp/binary-ops.cpp +158 -0
  24. package/cpp/binary-ops.h +16 -0
  25. package/cpp/chat.cpp +1769 -1779
  26. package/cpp/chat.h +9 -1
  27. package/cpp/common.cpp +20 -522
  28. package/cpp/common.h +13 -36
  29. package/cpp/cpu-common.h +72 -0
  30. package/cpp/ggml-common.h +12 -6
  31. package/cpp/ggml-cpu-aarch64.cpp +1557 -80
  32. package/cpp/ggml-cpu-impl.h +2 -21
  33. package/cpp/ggml-cpu-quants.c +904 -405
  34. package/cpp/ggml-cpu.c +909 -13237
  35. package/cpp/ggml-impl.h +50 -23
  36. package/cpp/ggml-llama-sim.metallib +0 -0
  37. package/cpp/ggml-llama.metallib +0 -0
  38. package/cpp/ggml-metal-impl.h +597 -523
  39. package/cpp/ggml-metal.m +798 -580
  40. package/cpp/ggml.c +92 -3
  41. package/cpp/ggml.h +30 -6
  42. package/cpp/gguf.cpp +1 -0
  43. package/cpp/llama-adapter.cpp +55 -20
  44. package/cpp/llama-adapter.h +11 -9
  45. package/cpp/llama-arch.cpp +217 -16
  46. package/cpp/llama-arch.h +25 -0
  47. package/cpp/llama-batch.h +2 -2
  48. package/cpp/llama-chat.cpp +54 -2
  49. package/cpp/llama-chat.h +3 -0
  50. package/cpp/llama-context.cpp +2294 -1238
  51. package/cpp/llama-context.h +214 -77
  52. package/cpp/llama-cparams.h +1 -0
  53. package/cpp/llama-graph.cpp +1695 -0
  54. package/cpp/llama-graph.h +592 -0
  55. package/cpp/llama-hparams.cpp +8 -0
  56. package/cpp/llama-hparams.h +17 -0
  57. package/cpp/llama-io.cpp +15 -0
  58. package/cpp/llama-io.h +35 -0
  59. package/cpp/llama-kv-cache.cpp +965 -303
  60. package/cpp/llama-kv-cache.h +145 -151
  61. package/cpp/llama-memory.cpp +1 -0
  62. package/cpp/llama-memory.h +21 -0
  63. package/cpp/llama-mmap.cpp +1 -1
  64. package/cpp/llama-model-loader.cpp +10 -5
  65. package/cpp/llama-model-loader.h +5 -3
  66. package/cpp/llama-model.cpp +9194 -201
  67. package/cpp/llama-model.h +40 -1
  68. package/cpp/llama-sampling.cpp +5 -0
  69. package/cpp/llama-vocab.cpp +36 -5
  70. package/cpp/llama.cpp +51 -9984
  71. package/cpp/llama.h +102 -22
  72. package/cpp/log.cpp +34 -0
  73. package/cpp/minja/chat-template.hpp +15 -7
  74. package/cpp/minja/minja.hpp +120 -94
  75. package/cpp/ops.cpp +8723 -0
  76. package/cpp/ops.h +128 -0
  77. package/cpp/rn-llama.cpp +873 -882
  78. package/cpp/rn-llama.h +138 -148
  79. package/cpp/sampling.cpp +3 -0
  80. package/cpp/sampling.h +107 -107
  81. package/cpp/sgemm.cpp +533 -88
  82. package/cpp/simd-mappings.h +888 -0
  83. package/cpp/speculative.cpp +4 -4
  84. package/cpp/unary-ops.cpp +186 -0
  85. package/cpp/unary-ops.h +28 -0
  86. package/cpp/unicode-data.cpp +7034 -7034
  87. package/cpp/unicode-data.h +20 -20
  88. package/cpp/unicode.cpp +849 -849
  89. package/cpp/unicode.h +66 -66
  90. package/cpp/vec.cpp +258 -0
  91. package/cpp/vec.h +802 -0
  92. package/ios/CMakeLists.txt +116 -105
  93. package/ios/RNLlama.h +7 -7
  94. package/ios/RNLlama.mm +418 -405
  95. package/ios/RNLlamaContext.h +57 -57
  96. package/ios/RNLlamaContext.mm +835 -819
  97. package/ios/rnllama.xcframework/Info.plist +74 -74
  98. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
  99. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +143 -0
  100. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +677 -0
  101. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
  102. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
  103. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  104. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
  105. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
  106. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
  107. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  108. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  109. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  110. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  111. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
  112. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
  113. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  114. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
  115. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
  116. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
  117. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
  118. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +2222 -0
  119. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/gguf.h +202 -0
  120. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  121. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json.hpp +24766 -0
  122. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +265 -0
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
  131. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
  132. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
  133. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-io.h +35 -0
  134. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  135. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
  136. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
  137. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
  138. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +409 -0
  139. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
  140. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
  141. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +1434 -0
  142. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/log.h +132 -0
  143. package/{cpp → ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja}/chat-template.hpp +15 -7
  144. package/{cpp → ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja}/minja.hpp +120 -94
  145. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +128 -0
  146. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
  147. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sampling.h +107 -0
  148. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +14 -0
  149. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
  150. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/speculative.h +28 -0
  151. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
  152. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
  153. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode.h +66 -0
  154. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +802 -0
  155. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
  156. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  157. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
  160. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
  161. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
  162. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
  163. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  164. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
  165. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
  166. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
  167. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  168. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  169. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  170. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  171. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
  172. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
  173. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  174. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
  175. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
  176. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
  177. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
  178. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
  179. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
  180. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  181. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
  182. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
  183. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
  184. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
  185. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
  186. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
  187. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
  188. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
  189. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
  190. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
  191. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
  192. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
  193. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
  194. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  195. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
  196. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
  197. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
  198. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
  199. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
  200. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
  201. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
  202. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
  203. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  204. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  205. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
  206. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
  207. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
  208. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
  209. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
  210. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
  211. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
  212. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
  213. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
  214. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
  215. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  216. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
  217. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  218. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  219. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
  220. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +143 -0
  221. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +677 -0
  222. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
  223. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
  224. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  225. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
  226. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
  227. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
  228. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  229. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  230. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  231. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  232. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
  233. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
  234. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  235. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
  236. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
  237. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
  238. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
  239. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +2222 -0
  240. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/gguf.h +202 -0
  241. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  242. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json.hpp +24766 -0
  243. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
  244. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
  245. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
  246. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
  247. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +265 -0
  248. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
  249. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
  250. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
  251. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
  252. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
  253. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
  254. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-io.h +35 -0
  255. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  256. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
  257. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
  258. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
  259. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +409 -0
  260. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
  261. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
  262. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +1434 -0
  263. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/log.h +132 -0
  264. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  265. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  266. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +128 -0
  267. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
  268. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sampling.h +107 -0
  269. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +14 -0
  270. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
  271. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/speculative.h +28 -0
  272. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
  273. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
  274. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode.h +66 -0
  275. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/vec.h +802 -0
  276. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
  277. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  278. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  279. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
  280. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
  281. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
  282. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
  283. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
  284. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  285. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
  286. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
  287. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
  288. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  289. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  290. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  291. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  292. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
  293. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
  294. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  295. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
  296. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
  297. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
  298. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
  299. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
  300. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
  301. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  302. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
  303. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
  304. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
  305. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
  306. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
  307. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
  308. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
  309. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
  310. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
  311. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
  312. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
  313. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
  314. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
  315. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  316. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
  317. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
  318. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
  319. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
  320. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
  321. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
  322. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
  323. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
  324. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  325. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  326. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
  327. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
  328. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
  329. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
  330. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
  331. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
  332. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
  333. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
  334. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
  335. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
  336. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  337. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
  338. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  339. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  340. package/jest/mock.js +203 -203
  341. package/lib/commonjs/NativeRNLlama.js +1 -2
  342. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  343. package/lib/commonjs/chat.js.map +1 -1
  344. package/lib/commonjs/grammar.js +12 -31
  345. package/lib/commonjs/grammar.js.map +1 -1
  346. package/lib/commonjs/index.js +47 -47
  347. package/lib/commonjs/index.js.map +1 -1
  348. package/lib/commonjs/package.json +1 -0
  349. package/lib/module/NativeRNLlama.js +2 -0
  350. package/lib/module/NativeRNLlama.js.map +1 -1
  351. package/lib/module/chat.js +2 -0
  352. package/lib/module/chat.js.map +1 -1
  353. package/lib/module/grammar.js +14 -31
  354. package/lib/module/grammar.js.map +1 -1
  355. package/lib/module/index.js +47 -45
  356. package/lib/module/index.js.map +1 -1
  357. package/lib/module/package.json +1 -0
  358. package/lib/typescript/NativeRNLlama.d.ts +6 -4
  359. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  360. package/lib/typescript/index.d.ts.map +1 -1
  361. package/llama-rn.podspec +48 -48
  362. package/package.json +233 -233
  363. package/src/NativeRNLlama.ts +426 -424
  364. package/src/chat.ts +44 -44
  365. package/src/grammar.ts +854 -854
  366. package/src/index.ts +495 -485
@@ -1,424 +1,426 @@
1
- import type { TurboModule } from 'react-native'
2
- import { TurboModuleRegistry } from 'react-native'
3
-
4
- export type NativeEmbeddingParams = {
5
- embd_normalize?: number
6
- }
7
-
8
- export type NativeContextParams = {
9
- model: string
10
- /**
11
- * Chat template to override the default one from the model.
12
- */
13
- chat_template?: string
14
-
15
- reasoning_format?: string
16
-
17
- is_model_asset?: boolean
18
- use_progress_callback?: boolean
19
-
20
- n_ctx?: number
21
- n_batch?: number
22
- n_ubatch?: number
23
-
24
- n_threads?: number
25
-
26
- /**
27
- * Number of layers to store in VRAM (Currently only for iOS)
28
- */
29
- n_gpu_layers?: number
30
- /**
31
- * Skip GPU devices (iOS only)
32
- */
33
- no_gpu_devices?: boolean
34
-
35
- /**
36
- * Enable flash attention, only recommended in GPU device (Experimental in llama.cpp)
37
- */
38
- flash_attn?: boolean
39
-
40
- /**
41
- * KV cache data type for the K (Experimental in llama.cpp)
42
- */
43
- cache_type_k?: string
44
- /**
45
- * KV cache data type for the V (Experimental in llama.cpp)
46
- */
47
- cache_type_v?: string
48
-
49
- use_mlock?: boolean
50
- use_mmap?: boolean
51
- vocab_only?: boolean
52
-
53
- /**
54
- * Single LoRA adapter path
55
- */
56
- lora?: string
57
- /**
58
- * Single LoRA adapter scale
59
- */
60
- lora_scaled?: number
61
- /**
62
- * LoRA adapter list
63
- */
64
- lora_list?: Array<{ path: string; scaled?: number }>
65
-
66
- rope_freq_base?: number
67
- rope_freq_scale?: number
68
-
69
- pooling_type?: number
70
-
71
- // Embedding params
72
- embedding?: boolean
73
- embd_normalize?: number
74
- }
75
-
76
- export type NativeCompletionParams = {
77
- prompt: string
78
- n_threads?: number
79
- /**
80
- * JSON schema for convert to grammar for structured JSON output.
81
- * It will be override by grammar if both are set.
82
- */
83
- json_schema?: string
84
- /**
85
- * Set grammar for grammar-based sampling. Default: no grammar
86
- */
87
- grammar?: string
88
- /**
89
- * Lazy grammar sampling, trigger by grammar_triggers. Default: false
90
- */
91
- grammar_lazy?: boolean
92
- /**
93
- * Lazy grammar triggers. Default: []
94
- */
95
- grammar_triggers?: Array<{
96
- at_start: boolean
97
- word: string
98
- }>
99
- preserved_tokens?: Array<string>
100
- chat_format?: number
101
- /**
102
- * Specify a JSON array of stopping strings.
103
- * These words will not be included in the completion, so make sure to add them to the prompt for the next iteration. Default: `[]`
104
- */
105
- stop?: Array<string>
106
- /**
107
- * Set the maximum number of tokens to predict when generating text.
108
- * **Note:** May exceed the set limit slightly if the last token is a partial multibyte character.
109
- * When 0,no tokens will be generated but the prompt is evaluated into the cache. Default: `-1`, where `-1` is infinity.
110
- */
111
- n_predict?: number
112
- /**
113
- * If greater than 0, the response also contains the probabilities of top N tokens for each generated token given the sampling settings.
114
- * Note that for temperature < 0 the tokens are sampled greedily but token probabilities are still being calculated via a simple softmax of the logits without considering any other sampler settings.
115
- * Default: `0`
116
- */
117
- n_probs?: number
118
- /**
119
- * Limit the next token selection to the K most probable tokens. Default: `40`
120
- */
121
- top_k?: number
122
- /**
123
- * Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P. Default: `0.95`
124
- */
125
- top_p?: number
126
- /**
127
- * The minimum probability for a token to be considered, relative to the probability of the most likely token. Default: `0.05`
128
- */
129
- min_p?: number
130
- /**
131
- * Set the chance for token removal via XTC sampler. Default: `0.0`, which is disabled.
132
- */
133
- xtc_probability?: number
134
- /**
135
- * Set a minimum probability threshold for tokens to be removed via XTC sampler. Default: `0.1` (> `0.5` disables XTC)
136
- */
137
- xtc_threshold?: number
138
- /**
139
- * Enable locally typical sampling with parameter p. Default: `1.0`, which is disabled.
140
- */
141
- typical_p?: number
142
- /**
143
- * Adjust the randomness of the generated text. Default: `0.8`
144
- */
145
- temperature?: number
146
- /**
147
- * Last n tokens to consider for penalizing repetition. Default: `64`, where `0` is disabled and `-1` is ctx-size.
148
- */
149
- penalty_last_n?: number
150
- /**
151
- * Control the repetition of token sequences in the generated text. Default: `1.0`
152
- */
153
- penalty_repeat?: number
154
- /**
155
- * Repeat alpha frequency penalty. Default: `0.0`, which is disabled.
156
- */
157
- penalty_freq?: number
158
- /**
159
- * Repeat alpha presence penalty. Default: `0.0`, which is disabled.
160
- */
161
- penalty_present?: number
162
- /**
163
- * Enable Mirostat sampling, controlling perplexity during text generation. Default: `0`, where `0` is disabled, `1` is Mirostat, and `2` is Mirostat 2.0.
164
- */
165
- mirostat?: number
166
- /**
167
- * Set the Mirostat target entropy, parameter tau. Default: `5.0`
168
- */
169
- mirostat_tau?: number
170
- /**
171
- * Set the Mirostat learning rate, parameter eta. Default: `0.1`
172
- */
173
- mirostat_eta?: number
174
- /**
175
- * Set the DRY (Don't Repeat Yourself) repetition penalty multiplier. Default: `0.0`, which is disabled.
176
- */
177
- dry_multiplier?: number
178
- /**
179
- * Set the DRY repetition penalty base value. Default: `1.75`
180
- */
181
- dry_base?: number
182
- /**
183
- * Tokens that extend repetition beyond this receive exponentially increasing penalty: multiplier * base ^ (length of repeating sequence before token - allowed length). Default: `2`
184
- */
185
- dry_allowed_length?: number
186
- /**
187
- * How many tokens to scan for repetitions. Default: `-1`, where `0` is disabled and `-1` is context size.
188
- */
189
- dry_penalty_last_n?: number
190
- /**
191
- * Specify an array of sequence breakers for DRY sampling. Only a JSON array of strings is accepted. Default: `['\n', ':', '"', '*']`
192
- */
193
- dry_sequence_breakers?: Array<string>
194
- /**
195
- * Top n sigma sampling as described in academic paper "Top-nσ: Not All Logits Are You Need" https://arxiv.org/pdf/2411.07641. Default: `-1.0` (Disabled)
196
- */
197
- top_n_sigma?: number
198
-
199
- /**
200
- * Ignore end of stream token and continue generating. Default: `false`
201
- */
202
- ignore_eos?: boolean
203
- /**
204
- * Modify the likelihood of a token appearing in the generated text completion.
205
- * For example, use `"logit_bias": [[15043,1.0]]` to increase the likelihood of the token 'Hello', or `"logit_bias": [[15043,-1.0]]` to decrease its likelihood.
206
- * Setting the value to false, `"logit_bias": [[15043,false]]` ensures that the token `Hello` is never produced. The tokens can also be represented as strings,
207
- * e.g.`[["Hello, World!",-0.5]]` will reduce the likelihood of all the individual tokens that represent the string `Hello, World!`, just like the `presence_penalty` does.
208
- * Default: `[]`
209
- */
210
- logit_bias?: Array<Array<number>>
211
- /**
212
- * Set the random number generator (RNG) seed. Default: `-1`, which is a random seed.
213
- */
214
- seed?: number
215
-
216
- emit_partial_completion: boolean
217
- }
218
-
219
- export type NativeCompletionTokenProbItem = {
220
- tok_str: string
221
- prob: number
222
- }
223
-
224
- export type NativeCompletionTokenProb = {
225
- content: string
226
- probs: Array<NativeCompletionTokenProbItem>
227
- }
228
-
229
- export type NativeCompletionResultTimings = {
230
- prompt_n: number
231
- prompt_ms: number
232
- prompt_per_token_ms: number
233
- prompt_per_second: number
234
- predicted_n: number
235
- predicted_ms: number
236
- predicted_per_token_ms: number
237
- predicted_per_second: number
238
- }
239
-
240
- export type NativeCompletionResult = {
241
- /**
242
- * Original text (Ignored reasoning_content / tool_calls)
243
- */
244
- text: string
245
- /**
246
- * Reasoning content (parsed for reasoning model)
247
- */
248
- reasoning_content: string
249
- /**
250
- * Tool calls
251
- */
252
- tool_calls: Array<{
253
- type: 'function'
254
- function: {
255
- name: string
256
- arguments: string
257
- }
258
- id?: string
259
- }>
260
- /**
261
- * Content text (Filtered text by reasoning_content / tool_calls)
262
- */
263
- content: string
264
-
265
- tokens_predicted: number
266
- tokens_evaluated: number
267
- truncated: boolean
268
- stopped_eos: boolean
269
- stopped_word: string
270
- stopped_limit: number
271
- stopping_word: string
272
- tokens_cached: number
273
- timings: NativeCompletionResultTimings
274
-
275
- completion_probabilities?: Array<NativeCompletionTokenProb>
276
- }
277
-
278
- export type NativeTokenizeResult = {
279
- tokens: Array<number>
280
- }
281
-
282
- export type NativeEmbeddingResult = {
283
- embedding: Array<number>
284
- }
285
-
286
- export type NativeLlamaContext = {
287
- contextId: number
288
- model: {
289
- desc: string
290
- size: number
291
- nEmbd: number
292
- nParams: number
293
- chatTemplates: {
294
- llamaChat: boolean // Chat template in llama-chat.cpp
295
- minja: {
296
- // Chat template supported by minja.hpp
297
- default: boolean
298
- defaultCaps: {
299
- tools: boolean
300
- toolCalls: boolean
301
- toolResponses: boolean
302
- systemRole: boolean
303
- parallelToolCalls: boolean
304
- toolCallId: boolean
305
- }
306
- toolUse: boolean
307
- toolUseCaps: {
308
- tools: boolean
309
- toolCalls: boolean
310
- toolResponses: boolean
311
- systemRole: boolean
312
- parallelToolCalls: boolean
313
- toolCallId: boolean
314
- }
315
- }
316
- }
317
- metadata: Object
318
- isChatTemplateSupported: boolean // Deprecated
319
- }
320
- /**
321
- * Loaded library name for Android
322
- */
323
- androidLib?: string
324
- gpu: boolean
325
- reasonNoGPU: string
326
- }
327
-
328
- export type NativeSessionLoadResult = {
329
- tokens_loaded: number
330
- prompt: string
331
- }
332
-
333
- export type NativeLlamaChatMessage = {
334
- role: string
335
- content: string
336
- }
337
-
338
- export type NativeCPUFeatures = {
339
- armv8: boolean
340
- i8mm: boolean
341
- dotprod: boolean
342
- }
343
-
344
- export type JinjaFormattedChatResult = {
345
- prompt: string
346
- chat_format?: number
347
- grammar?: string
348
- grammar_lazy?: boolean
349
- grammar_triggers?: Array<{
350
- at_start: boolean
351
- word: string
352
- }>
353
- preserved_tokens?: Array<string>
354
- additional_stops?: Array<string>
355
- }
356
-
357
- export interface Spec extends TurboModule {
358
- toggleNativeLog(enabled: boolean): Promise<void>
359
- setContextLimit(limit: number): Promise<void>
360
-
361
- modelInfo(path: string, skip?: string[]): Promise<Object>
362
- initContext(
363
- contextId: number,
364
- params: NativeContextParams,
365
- ): Promise<NativeLlamaContext>
366
-
367
- getFormattedChat(
368
- contextId: number,
369
- messages: string,
370
- chatTemplate?: string,
371
- params?: {
372
- jinja?: boolean
373
- json_schema?: string
374
- tools?: string
375
- parallel_tool_calls?: string
376
- tool_choice?: string
377
- },
378
- ): Promise<JinjaFormattedChatResult | string>
379
- loadSession(
380
- contextId: number,
381
- filepath: string,
382
- ): Promise<NativeSessionLoadResult>
383
- saveSession(
384
- contextId: number,
385
- filepath: string,
386
- size: number,
387
- ): Promise<number>
388
- completion(
389
- contextId: number,
390
- params: NativeCompletionParams,
391
- ): Promise<NativeCompletionResult>
392
- stopCompletion(contextId: number): Promise<void>
393
- tokenizeAsync(contextId: number, text: string): Promise<NativeTokenizeResult>
394
- tokenizeSync(contextId: number, text: string): NativeTokenizeResult
395
- getCpuFeatures() : Promise<NativeCPUFeatures>
396
- detokenize(contextId: number, tokens: number[]): Promise<string>
397
- embedding(
398
- contextId: number,
399
- text: string,
400
- params: NativeEmbeddingParams,
401
- ): Promise<NativeEmbeddingResult>
402
- bench(
403
- contextId: number,
404
- pp: number,
405
- tg: number,
406
- pl: number,
407
- nr: number,
408
- ): Promise<string>
409
-
410
- applyLoraAdapters(
411
- contextId: number,
412
- loraAdapters: Array<{ path: string; scaled?: number }>,
413
- ): Promise<void>
414
- removeLoraAdapters(contextId: number): Promise<void>
415
- getLoadedLoraAdapters(
416
- contextId: number,
417
- ): Promise<Array<{ path: string; scaled?: number }>>
418
-
419
- releaseContext(contextId: number): Promise<void>
420
-
421
- releaseAllContexts(): Promise<void>
422
- }
423
-
424
- export default TurboModuleRegistry.get<Spec>('RNLlama') as Spec
1
+ import type { TurboModule } from 'react-native'
2
+ import { TurboModuleRegistry } from 'react-native'
3
+
4
+ export type NativeEmbeddingParams = {
5
+ embd_normalize?: number
6
+ }
7
+
8
+ export type NativeContextParams = {
9
+ model: string
10
+ /**
11
+ * Chat template to override the default one from the model.
12
+ */
13
+ chat_template?: string
14
+
15
+ reasoning_format?: string
16
+
17
+ is_model_asset?: boolean
18
+ use_progress_callback?: boolean
19
+
20
+ n_ctx?: number
21
+ n_batch?: number
22
+ n_ubatch?: number
23
+
24
+ n_threads?: number
25
+
26
+ /**
27
+ * Number of layers to store in VRAM (Currently only for iOS)
28
+ */
29
+ n_gpu_layers?: number
30
+ /**
31
+ * Skip GPU devices (iOS only)
32
+ */
33
+ no_gpu_devices?: boolean
34
+
35
+ /**
36
+ * Enable flash attention, only recommended in GPU device (Experimental in llama.cpp)
37
+ */
38
+ flash_attn?: boolean
39
+
40
+ /**
41
+ * KV cache data type for the K (Experimental in llama.cpp)
42
+ */
43
+ cache_type_k?: string
44
+ /**
45
+ * KV cache data type for the V (Experimental in llama.cpp)
46
+ */
47
+ cache_type_v?: string
48
+
49
+ use_mlock?: boolean
50
+ use_mmap?: boolean
51
+ vocab_only?: boolean
52
+
53
+ /**
54
+ * Single LoRA adapter path
55
+ */
56
+ lora?: string
57
+ /**
58
+ * Single LoRA adapter scale
59
+ */
60
+ lora_scaled?: number
61
+ /**
62
+ * LoRA adapter list
63
+ */
64
+ lora_list?: Array<{ path: string; scaled?: number }>
65
+
66
+ rope_freq_base?: number
67
+ rope_freq_scale?: number
68
+
69
+ pooling_type?: number
70
+
71
+ // Embedding params
72
+ embedding?: boolean
73
+ embd_normalize?: number
74
+ }
75
+
76
+ export type NativeCompletionParams = {
77
+ prompt: string
78
+ n_threads?: number
79
+ /**
80
+ * JSON schema for convert to grammar for structured JSON output.
81
+ * It will be override by grammar if both are set.
82
+ */
83
+ json_schema?: string
84
+ /**
85
+ * Set grammar for grammar-based sampling. Default: no grammar
86
+ */
87
+ grammar?: string
88
+ /**
89
+ * Lazy grammar sampling, trigger by grammar_triggers. Default: false
90
+ */
91
+ grammar_lazy?: boolean
92
+ /**
93
+ * Lazy grammar triggers. Default: []
94
+ */
95
+ grammar_triggers?: Array<{
96
+ type: number
97
+ value: string
98
+ token: number
99
+ }>
100
+ preserved_tokens?: Array<string>
101
+ chat_format?: number
102
+ /**
103
+ * Specify a JSON array of stopping strings.
104
+ * These words will not be included in the completion, so make sure to add them to the prompt for the next iteration. Default: `[]`
105
+ */
106
+ stop?: Array<string>
107
+ /**
108
+ * Set the maximum number of tokens to predict when generating text.
109
+ * **Note:** May exceed the set limit slightly if the last token is a partial multibyte character.
110
+ * When 0,no tokens will be generated but the prompt is evaluated into the cache. Default: `-1`, where `-1` is infinity.
111
+ */
112
+ n_predict?: number
113
+ /**
114
+ * If greater than 0, the response also contains the probabilities of top N tokens for each generated token given the sampling settings.
115
+ * Note that for temperature < 0 the tokens are sampled greedily but token probabilities are still being calculated via a simple softmax of the logits without considering any other sampler settings.
116
+ * Default: `0`
117
+ */
118
+ n_probs?: number
119
+ /**
120
+ * Limit the next token selection to the K most probable tokens. Default: `40`
121
+ */
122
+ top_k?: number
123
+ /**
124
+ * Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P. Default: `0.95`
125
+ */
126
+ top_p?: number
127
+ /**
128
+ * The minimum probability for a token to be considered, relative to the probability of the most likely token. Default: `0.05`
129
+ */
130
+ min_p?: number
131
+ /**
132
+ * Set the chance for token removal via XTC sampler. Default: `0.0`, which is disabled.
133
+ */
134
+ xtc_probability?: number
135
+ /**
136
+ * Set a minimum probability threshold for tokens to be removed via XTC sampler. Default: `0.1` (> `0.5` disables XTC)
137
+ */
138
+ xtc_threshold?: number
139
+ /**
140
+ * Enable locally typical sampling with parameter p. Default: `1.0`, which is disabled.
141
+ */
142
+ typical_p?: number
143
+ /**
144
+ * Adjust the randomness of the generated text. Default: `0.8`
145
+ */
146
+ temperature?: number
147
+ /**
148
+ * Last n tokens to consider for penalizing repetition. Default: `64`, where `0` is disabled and `-1` is ctx-size.
149
+ */
150
+ penalty_last_n?: number
151
+ /**
152
+ * Control the repetition of token sequences in the generated text. Default: `1.0`
153
+ */
154
+ penalty_repeat?: number
155
+ /**
156
+ * Repeat alpha frequency penalty. Default: `0.0`, which is disabled.
157
+ */
158
+ penalty_freq?: number
159
+ /**
160
+ * Repeat alpha presence penalty. Default: `0.0`, which is disabled.
161
+ */
162
+ penalty_present?: number
163
+ /**
164
+ * Enable Mirostat sampling, controlling perplexity during text generation. Default: `0`, where `0` is disabled, `1` is Mirostat, and `2` is Mirostat 2.0.
165
+ */
166
+ mirostat?: number
167
+ /**
168
+ * Set the Mirostat target entropy, parameter tau. Default: `5.0`
169
+ */
170
+ mirostat_tau?: number
171
+ /**
172
+ * Set the Mirostat learning rate, parameter eta. Default: `0.1`
173
+ */
174
+ mirostat_eta?: number
175
+ /**
176
+ * Set the DRY (Don't Repeat Yourself) repetition penalty multiplier. Default: `0.0`, which is disabled.
177
+ */
178
+ dry_multiplier?: number
179
+ /**
180
+ * Set the DRY repetition penalty base value. Default: `1.75`
181
+ */
182
+ dry_base?: number
183
+ /**
184
+ * Tokens that extend repetition beyond this receive exponentially increasing penalty: multiplier * base ^ (length of repeating sequence before token - allowed length). Default: `2`
185
+ */
186
+ dry_allowed_length?: number
187
+ /**
188
+ * How many tokens to scan for repetitions. Default: `-1`, where `0` is disabled and `-1` is context size.
189
+ */
190
+ dry_penalty_last_n?: number
191
+ /**
192
+ * Specify an array of sequence breakers for DRY sampling. Only a JSON array of strings is accepted. Default: `['\n', ':', '"', '*']`
193
+ */
194
+ dry_sequence_breakers?: Array<string>
195
+ /**
196
+ * Top n sigma sampling as described in academic paper "Top-nσ: Not All Logits Are You Need" https://arxiv.org/pdf/2411.07641. Default: `-1.0` (Disabled)
197
+ */
198
+ top_n_sigma?: number
199
+
200
+ /**
201
+ * Ignore end of stream token and continue generating. Default: `false`
202
+ */
203
+ ignore_eos?: boolean
204
+ /**
205
+ * Modify the likelihood of a token appearing in the generated text completion.
206
+ * For example, use `"logit_bias": [[15043,1.0]]` to increase the likelihood of the token 'Hello', or `"logit_bias": [[15043,-1.0]]` to decrease its likelihood.
207
+ * Setting the value to false, `"logit_bias": [[15043,false]]` ensures that the token `Hello` is never produced. The tokens can also be represented as strings,
208
+ * e.g.`[["Hello, World!",-0.5]]` will reduce the likelihood of all the individual tokens that represent the string `Hello, World!`, just like the `presence_penalty` does.
209
+ * Default: `[]`
210
+ */
211
+ logit_bias?: Array<Array<number>>
212
+ /**
213
+ * Set the random number generator (RNG) seed. Default: `-1`, which is a random seed.
214
+ */
215
+ seed?: number
216
+
217
+ emit_partial_completion: boolean
218
+ }
219
+
220
+ export type NativeCompletionTokenProbItem = {
221
+ tok_str: string
222
+ prob: number
223
+ }
224
+
225
+ export type NativeCompletionTokenProb = {
226
+ content: string
227
+ probs: Array<NativeCompletionTokenProbItem>
228
+ }
229
+
230
+ export type NativeCompletionResultTimings = {
231
+ prompt_n: number
232
+ prompt_ms: number
233
+ prompt_per_token_ms: number
234
+ prompt_per_second: number
235
+ predicted_n: number
236
+ predicted_ms: number
237
+ predicted_per_token_ms: number
238
+ predicted_per_second: number
239
+ }
240
+
241
+ export type NativeCompletionResult = {
242
+ /**
243
+ * Original text (Ignored reasoning_content / tool_calls)
244
+ */
245
+ text: string
246
+ /**
247
+ * Reasoning content (parsed for reasoning model)
248
+ */
249
+ reasoning_content: string
250
+ /**
251
+ * Tool calls
252
+ */
253
+ tool_calls: Array<{
254
+ type: 'function'
255
+ function: {
256
+ name: string
257
+ arguments: string
258
+ }
259
+ id?: string
260
+ }>
261
+ /**
262
+ * Content text (Filtered text by reasoning_content / tool_calls)
263
+ */
264
+ content: string
265
+
266
+ tokens_predicted: number
267
+ tokens_evaluated: number
268
+ truncated: boolean
269
+ stopped_eos: boolean
270
+ stopped_word: string
271
+ stopped_limit: number
272
+ stopping_word: string
273
+ tokens_cached: number
274
+ timings: NativeCompletionResultTimings
275
+
276
+ completion_probabilities?: Array<NativeCompletionTokenProb>
277
+ }
278
+
279
+ export type NativeTokenizeResult = {
280
+ tokens: Array<number>
281
+ }
282
+
283
+ export type NativeEmbeddingResult = {
284
+ embedding: Array<number>
285
+ }
286
+
287
+ export type NativeLlamaContext = {
288
+ contextId: number
289
+ model: {
290
+ desc: string
291
+ size: number
292
+ nEmbd: number
293
+ nParams: number
294
+ chatTemplates: {
295
+ llamaChat: boolean // Chat template in llama-chat.cpp
296
+ minja: {
297
+ // Chat template supported by minja.hpp
298
+ default: boolean
299
+ defaultCaps: {
300
+ tools: boolean
301
+ toolCalls: boolean
302
+ toolResponses: boolean
303
+ systemRole: boolean
304
+ parallelToolCalls: boolean
305
+ toolCallId: boolean
306
+ }
307
+ toolUse: boolean
308
+ toolUseCaps: {
309
+ tools: boolean
310
+ toolCalls: boolean
311
+ toolResponses: boolean
312
+ systemRole: boolean
313
+ parallelToolCalls: boolean
314
+ toolCallId: boolean
315
+ }
316
+ }
317
+ }
318
+ metadata: Object
319
+ isChatTemplateSupported: boolean // Deprecated
320
+ }
321
+ /**
322
+ * Loaded library name for Android
323
+ */
324
+ androidLib?: string
325
+ gpu: boolean
326
+ reasonNoGPU: string
327
+ }
328
+
329
+ export type NativeSessionLoadResult = {
330
+ tokens_loaded: number
331
+ prompt: string
332
+ }
333
+
334
+ export type NativeLlamaChatMessage = {
335
+ role: string
336
+ content: string
337
+ }
338
+
339
+ export type NativeCPUFeatures = {
340
+ armv8: boolean
341
+ i8mm: boolean
342
+ dotprod: boolean
343
+ }
344
+
345
+ export type JinjaFormattedChatResult = {
346
+ prompt: string
347
+ chat_format?: number
348
+ grammar?: string
349
+ grammar_lazy?: boolean
350
+ grammar_triggers?: Array<{
351
+ type: number
352
+ value: string
353
+ token: number
354
+ }>
355
+ preserved_tokens?: Array<string>
356
+ additional_stops?: Array<string>
357
+ }
358
+
359
+ export interface Spec extends TurboModule {
360
+ toggleNativeLog(enabled: boolean): Promise<void>
361
+ setContextLimit(limit: number): Promise<void>
362
+
363
+ modelInfo(path: string, skip?: string[]): Promise<Object>
364
+ initContext(
365
+ contextId: number,
366
+ params: NativeContextParams,
367
+ ): Promise<NativeLlamaContext>
368
+
369
+ getFormattedChat(
370
+ contextId: number,
371
+ messages: string,
372
+ chatTemplate?: string,
373
+ params?: {
374
+ jinja?: boolean
375
+ json_schema?: string
376
+ tools?: string
377
+ parallel_tool_calls?: string
378
+ tool_choice?: string
379
+ },
380
+ ): Promise<JinjaFormattedChatResult | string>
381
+ loadSession(
382
+ contextId: number,
383
+ filepath: string,
384
+ ): Promise<NativeSessionLoadResult>
385
+ saveSession(
386
+ contextId: number,
387
+ filepath: string,
388
+ size: number,
389
+ ): Promise<number>
390
+ completion(
391
+ contextId: number,
392
+ params: NativeCompletionParams,
393
+ ): Promise<NativeCompletionResult>
394
+ stopCompletion(contextId: number): Promise<void>
395
+ tokenizeAsync(contextId: number, text: string): Promise<NativeTokenizeResult>
396
+ tokenizeSync(contextId: number, text: string): NativeTokenizeResult
397
+ getCpuFeatures() : Promise<NativeCPUFeatures>
398
+ detokenize(contextId: number, tokens: number[]): Promise<string>
399
+ embedding(
400
+ contextId: number,
401
+ text: string,
402
+ params: NativeEmbeddingParams,
403
+ ): Promise<NativeEmbeddingResult>
404
+ bench(
405
+ contextId: number,
406
+ pp: number,
407
+ tg: number,
408
+ pl: number,
409
+ nr: number,
410
+ ): Promise<string>
411
+
412
+ applyLoraAdapters(
413
+ contextId: number,
414
+ loraAdapters: Array<{ path: string; scaled?: number }>,
415
+ ): Promise<void>
416
+ removeLoraAdapters(contextId: number): Promise<void>
417
+ getLoadedLoraAdapters(
418
+ contextId: number,
419
+ ): Promise<Array<{ path: string; scaled?: number }>>
420
+
421
+ releaseContext(contextId: number): Promise<void>
422
+
423
+ releaseAllContexts(): Promise<void>
424
+ }
425
+
426
+ export default TurboModuleRegistry.get<Spec>('RNLlama') as Spec