cui-llama.rn 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (309) hide show
  1. package/LICENSE +20 -20
  2. package/README.md +317 -319
  3. package/android/build.gradle +116 -116
  4. package/android/gradle.properties +5 -5
  5. package/android/src/main/AndroidManifest.xml +4 -4
  6. package/android/src/main/CMakeLists.txt +124 -124
  7. package/android/src/main/java/com/rnllama/LlamaContext.java +645 -645
  8. package/android/src/main/java/com/rnllama/RNLlama.java +695 -695
  9. package/android/src/main/java/com/rnllama/RNLlamaPackage.java +48 -48
  10. package/android/src/main/jni-utils.h +100 -100
  11. package/android/src/main/jni.cpp +1263 -1263
  12. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  14. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  15. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  16. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  17. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  18. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  19. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  20. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +135 -135
  21. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +136 -136
  22. package/cpp/README.md +4 -4
  23. package/cpp/ggml-llama-sim.metallib +0 -0
  24. package/cpp/ggml-llama.metallib +0 -0
  25. package/cpp/ggml-metal-impl.h +597 -597
  26. package/cpp/ggml-metal.m +4 -0
  27. package/cpp/ggml.h +1 -1
  28. package/cpp/rn-llama.cpp +873 -873
  29. package/cpp/rn-llama.h +138 -138
  30. package/cpp/sampling.h +107 -107
  31. package/cpp/unicode-data.cpp +7034 -7034
  32. package/cpp/unicode-data.h +20 -20
  33. package/cpp/unicode.cpp +849 -849
  34. package/cpp/unicode.h +66 -66
  35. package/ios/CMakeLists.txt +116 -108
  36. package/ios/RNLlama.h +7 -7
  37. package/ios/RNLlama.mm +418 -405
  38. package/ios/RNLlamaContext.h +57 -57
  39. package/ios/RNLlamaContext.mm +835 -835
  40. package/ios/rnllama.xcframework/Info.plist +74 -74
  41. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
  42. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +143 -0
  43. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +677 -0
  44. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
  45. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
  46. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  47. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
  48. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
  49. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
  50. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  51. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  52. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  53. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  54. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
  55. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
  56. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  57. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
  58. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
  59. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
  60. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
  61. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +2222 -0
  62. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/gguf.h +202 -0
  63. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  64. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json.hpp +24766 -0
  65. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
  66. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
  67. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
  68. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
  69. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +265 -0
  70. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
  71. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
  72. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
  73. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
  74. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
  75. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
  76. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-io.h +35 -0
  77. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  78. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
  79. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
  80. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
  81. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +409 -0
  82. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
  83. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
  84. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +1434 -0
  85. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/log.h +132 -0
  86. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  87. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  88. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +128 -0
  89. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
  90. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sampling.h +107 -0
  91. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +14 -0
  92. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
  93. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/speculative.h +28 -0
  94. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
  95. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
  96. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode.h +66 -0
  97. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +802 -0
  98. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
  99. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  100. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  101. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
  102. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
  103. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
  104. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
  105. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
  106. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  107. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
  108. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
  109. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
  110. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  111. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  112. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  113. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  114. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
  115. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
  116. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  117. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
  118. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
  119. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
  120. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
  121. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
  122. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
  123. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  124. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
  125. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
  126. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
  127. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
  128. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
  129. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
  130. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
  131. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
  132. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
  133. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
  134. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
  135. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
  136. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
  137. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  138. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
  139. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
  140. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
  141. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
  142. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
  143. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
  144. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
  145. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
  146. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  147. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  148. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
  149. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
  150. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
  151. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
  152. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
  153. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
  154. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
  155. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
  156. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
  160. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  161. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  162. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
  163. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +143 -0
  164. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +677 -0
  165. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
  166. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
  167. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  168. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
  169. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
  170. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
  171. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  172. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  173. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  174. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  175. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
  176. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
  177. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  178. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
  179. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
  180. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
  181. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
  182. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +2222 -0
  183. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/gguf.h +202 -0
  184. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  185. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json.hpp +24766 -0
  186. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
  187. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
  188. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
  189. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
  190. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +265 -0
  191. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
  192. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
  193. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
  194. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
  195. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
  196. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
  197. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-io.h +35 -0
  198. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  199. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
  200. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
  201. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
  202. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +409 -0
  203. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
  204. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
  205. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +1434 -0
  206. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/log.h +132 -0
  207. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  208. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  209. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +128 -0
  210. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
  211. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sampling.h +107 -0
  212. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +14 -0
  213. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
  214. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/speculative.h +28 -0
  215. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
  216. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
  217. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode.h +66 -0
  218. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/vec.h +802 -0
  219. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
  220. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  221. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  222. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
  223. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
  224. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
  225. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
  226. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
  227. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  228. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
  229. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
  230. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
  231. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  232. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  233. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  234. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  235. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
  236. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
  237. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  238. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
  239. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
  240. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
  241. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
  242. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
  243. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
  244. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  245. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
  246. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
  247. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
  248. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
  249. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
  250. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
  251. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
  252. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
  253. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
  254. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
  255. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
  256. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
  257. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
  258. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  259. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
  260. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
  261. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
  262. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
  263. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
  264. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
  265. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
  266. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
  267. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  268. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  269. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
  270. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
  271. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
  272. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
  273. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
  274. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
  275. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
  276. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
  277. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
  278. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
  279. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  280. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
  281. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  282. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  283. package/jest/mock.js +203 -203
  284. package/lib/commonjs/NativeRNLlama.js +1 -2
  285. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  286. package/lib/commonjs/chat.js.map +1 -1
  287. package/lib/commonjs/grammar.js +12 -31
  288. package/lib/commonjs/grammar.js.map +1 -1
  289. package/lib/commonjs/index.js +47 -47
  290. package/lib/commonjs/index.js.map +1 -1
  291. package/lib/commonjs/package.json +1 -0
  292. package/lib/module/NativeRNLlama.js +2 -0
  293. package/lib/module/NativeRNLlama.js.map +1 -1
  294. package/lib/module/chat.js +2 -0
  295. package/lib/module/chat.js.map +1 -1
  296. package/lib/module/grammar.js +14 -31
  297. package/lib/module/grammar.js.map +1 -1
  298. package/lib/module/index.js +47 -45
  299. package/lib/module/index.js.map +1 -1
  300. package/lib/module/package.json +1 -0
  301. package/lib/typescript/NativeRNLlama.d.ts +6 -4
  302. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  303. package/lib/typescript/index.d.ts.map +1 -1
  304. package/llama-rn.podspec +48 -48
  305. package/package.json +233 -233
  306. package/src/NativeRNLlama.ts +426 -426
  307. package/src/chat.ts +44 -44
  308. package/src/grammar.ts +854 -854
  309. package/src/index.ts +495 -487
@@ -1,645 +1,645 @@
1
- package com.rnllama;
2
-
3
- import com.facebook.react.bridge.Arguments;
4
- import com.facebook.react.bridge.WritableArray;
5
- import com.facebook.react.bridge.WritableMap;
6
- import com.facebook.react.bridge.ReadableMap;
7
- import com.facebook.react.bridge.ReadableArray;
8
- import com.facebook.react.bridge.ReactApplicationContext;
9
- import com.facebook.react.modules.core.DeviceEventManagerModule;
10
-
11
- import android.util.Log;
12
- import android.os.Build;
13
- import android.os.ParcelFileDescriptor;
14
- import android.net.Uri;
15
- import android.content.Intent;
16
- import android.content.res.AssetManager;
17
-
18
- import java.lang.StringBuilder;
19
- import java.io.BufferedReader;
20
- import java.io.FileReader;
21
- import java.io.File;
22
- import java.io.IOException;
23
- import java.io.InputStream;
24
- import java.io.FileInputStream;
25
-
26
- public class LlamaContext {
27
- public static final String NAME = "RNLlamaContext";
28
-
29
- private static String loadedLibrary = "";
30
-
31
- private static class NativeLogCallback {
32
- DeviceEventManagerModule.RCTDeviceEventEmitter eventEmitter;
33
-
34
- public NativeLogCallback(ReactApplicationContext reactContext) {
35
- this.eventEmitter = reactContext.getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter.class);
36
- }
37
-
38
- void emitNativeLog(String level, String text) {
39
- WritableMap event = Arguments.createMap();
40
- event.putString("level", level);
41
- event.putString("text", text);
42
- eventEmitter.emit("@RNLlama_onNativeLog", event);
43
- }
44
- }
45
-
46
- static void toggleNativeLog(ReactApplicationContext reactContext, boolean enabled) {
47
- if (LlamaContext.isArchNotSupported()) {
48
- throw new IllegalStateException("Only 64-bit architectures are supported");
49
- }
50
- if (enabled) {
51
- setupLog(new NativeLogCallback(reactContext));
52
- } else {
53
- unsetLog();
54
- }
55
- }
56
-
57
- private int id;
58
- private ReactApplicationContext reactContext;
59
- private long context;
60
- private WritableMap modelDetails;
61
- private int jobId = -1;
62
- private DeviceEventManagerModule.RCTDeviceEventEmitter eventEmitter;
63
-
64
- private byte[] ggufHeader = {0x47, 0x47, 0x55, 0x46};
65
-
66
- private boolean isGGUF(final String filepath, final ReactApplicationContext reactContext) {
67
- byte[] fileHeader = new byte[4];
68
- InputStream fis = null;
69
- try {
70
- if (filepath.startsWith("content")) {
71
- Uri uri = Uri.parse(filepath);
72
- reactContext.getApplicationContext().getContentResolver().takePersistableUriPermission(uri, Intent.FLAG_GRANT_READ_URI_PERMISSION);
73
- fis = reactContext.getApplicationContext().getContentResolver().openInputStream(uri);
74
- } else {
75
- fis = new FileInputStream(filepath);
76
- }
77
-
78
- int bytesRead = fis.read(fileHeader);
79
- if(bytesRead < 4) {
80
- return false;
81
- }
82
- for(int i = 0; i < 4; i++){
83
- if(fileHeader[i] != ggufHeader[i])
84
- return false;
85
- }
86
- return true;
87
- } catch (Exception e) {
88
- Log.e(NAME, "Failed to check GGUF: " + e.getMessage());
89
- return false;
90
- }finally {
91
- if (fis != null) {
92
- try {
93
- fis.close();
94
- } catch (Exception e) {
95
- Log.d(NAME, "Closing InputStream failed.");
96
- }
97
- }
98
- }
99
- }
100
-
101
- public LlamaContext(int id, ReactApplicationContext reactContext, ReadableMap params) {
102
- if (LlamaContext.isArchNotSupported()) {
103
- throw new IllegalStateException("Only 64-bit architectures are supported");
104
- }
105
- if (!params.hasKey("model")) {
106
- throw new IllegalArgumentException("Missing required parameter: model");
107
- }
108
-
109
- String modelName = params.getString("model");
110
-
111
- if(!isGGUF(modelName, reactContext)) {
112
- throw new IllegalArgumentException("File is not in GGUF format");
113
- }
114
-
115
- if ( modelName.startsWith("content://")) {
116
- Uri uri = Uri.parse(modelName);
117
- try {
118
- ParcelFileDescriptor pfd = reactContext.getApplicationContext().getContentResolver().openFileDescriptor(uri, "r");
119
- modelName = "" + pfd.getFd();
120
- } catch (Exception e) {
121
- Log.e(NAME, "Failed to convert to FD!");
122
- }
123
- }
124
-
125
- // Check if file has GGUF magic numbers
126
- this.id = id;
127
- eventEmitter = reactContext.getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter.class);
128
- this.context = initContext(
129
- // String model,
130
- modelName,
131
- // String chat_template,
132
- params.hasKey("chat_template") ? params.getString("chat_template") : "",
133
- // String reasoning_format,
134
- params.hasKey("reasoning_format") ? params.getString("reasoning_format") : "none",
135
- // boolean embedding,
136
- params.hasKey("embedding") ? params.getBoolean("embedding") : false,
137
- // int embd_normalize,
138
- params.hasKey("embd_normalize") ? params.getInt("embd_normalize") : -1,
139
- // int n_ctx,
140
- params.hasKey("n_ctx") ? params.getInt("n_ctx") : 512,
141
- // int n_batch,
142
- params.hasKey("n_batch") ? params.getInt("n_batch") : 512,
143
- // int n_ubatch,
144
- params.hasKey("n_ubatch") ? params.getInt("n_ubatch") : 512,
145
- // int n_threads,
146
- params.hasKey("n_threads") ? params.getInt("n_threads") : 0,
147
- // int n_gpu_layers, // TODO: Support this
148
- params.hasKey("n_gpu_layers") ? params.getInt("n_gpu_layers") : 0,
149
- // boolean flash_attn,
150
- params.hasKey("flash_attn") ? params.getBoolean("flash_attn") : false,
151
- // String cache_type_k,
152
- params.hasKey("cache_type_k") ? params.getString("cache_type_k") : "f16",
153
- // String cache_type_v,
154
- params.hasKey("cache_type_v") ? params.getString("cache_type_v") : "f16",
155
- // boolean use_mlock,
156
- params.hasKey("use_mlock") ? params.getBoolean("use_mlock") : true,
157
- // boolean use_mmap,
158
- params.hasKey("use_mmap") ? params.getBoolean("use_mmap") : true,
159
- //boolean vocab_only,
160
- params.hasKey("vocab_only") ? params.getBoolean("vocab_only") : false,
161
- // String lora,
162
- params.hasKey("lora") ? params.getString("lora") : "",
163
- // float lora_scaled,
164
- params.hasKey("lora_scaled") ? (float) params.getDouble("lora_scaled") : 1.0f,
165
- // ReadableArray lora_adapters,
166
- params.hasKey("lora_list") ? params.getArray("lora_list") : null,
167
- // float rope_freq_base,
168
- params.hasKey("rope_freq_base") ? (float) params.getDouble("rope_freq_base") : 0.0f,
169
- // float rope_freq_scale
170
- params.hasKey("rope_freq_scale") ? (float) params.getDouble("rope_freq_scale") : 0.0f,
171
- // int pooling_type,
172
- params.hasKey("pooling_type") ? params.getInt("pooling_type") : -1,
173
- // LoadProgressCallback load_progress_callback
174
- params.hasKey("use_progress_callback") ? new LoadProgressCallback(this) : null
175
- );
176
- if (this.context == -1) {
177
- throw new IllegalStateException("Failed to initialize context");
178
- }
179
- this.modelDetails = loadModelDetails(this.context);
180
- this.reactContext = reactContext;
181
- }
182
-
183
- public void interruptLoad() {
184
- interruptLoad(this.context);
185
- }
186
-
187
- public long getContext() {
188
- return context;
189
- }
190
-
191
- public WritableMap getModelDetails() {
192
- return modelDetails;
193
- }
194
-
195
- public String getLoadedLibrary() {
196
- return loadedLibrary;
197
- }
198
-
199
- public WritableMap getFormattedChatWithJinja(String messages, String chatTemplate, ReadableMap params) {
200
- String jsonSchema = params.hasKey("json_schema") ? params.getString("json_schema") : "";
201
- String tools = params.hasKey("tools") ? params.getString("tools") : "";
202
- Boolean parallelToolCalls = params.hasKey("parallel_tool_calls") ? params.getBoolean("parallel_tool_calls") : false;
203
- String toolChoice = params.hasKey("tool_choice") ? params.getString("tool_choice") : "";
204
- return getFormattedChatWithJinja(
205
- this.context,
206
- messages,
207
- chatTemplate == null ? "" : chatTemplate,
208
- jsonSchema,
209
- tools,
210
- parallelToolCalls,
211
- toolChoice
212
- );
213
- }
214
-
215
- public String getFormattedChat(String messages, String chatTemplate) {
216
- return getFormattedChat(this.context, messages, chatTemplate == null ? "" : chatTemplate);
217
- }
218
-
219
- private void emitLoadProgress(int progress) {
220
- WritableMap event = Arguments.createMap();
221
- event.putInt("contextId", LlamaContext.this.id);
222
- event.putInt("progress", progress);
223
- eventEmitter.emit("@RNLlama_onInitContextProgress", event);
224
- }
225
-
226
- private static class LoadProgressCallback {
227
- LlamaContext context;
228
-
229
- public LoadProgressCallback(LlamaContext context) {
230
- this.context = context;
231
- }
232
-
233
- void onLoadProgress(int progress) {
234
- context.emitLoadProgress(progress);
235
- }
236
- }
237
-
238
- private void emitPartialCompletion(WritableMap tokenResult) {
239
- WritableMap event = Arguments.createMap();
240
- event.putInt("contextId", LlamaContext.this.id);
241
- event.putMap("tokenResult", tokenResult);
242
- eventEmitter.emit("@RNLlama_onToken", event);
243
- }
244
-
245
- private static class PartialCompletionCallback {
246
- LlamaContext context;
247
- boolean emitNeeded;
248
-
249
- public PartialCompletionCallback(LlamaContext context, boolean emitNeeded) {
250
- this.context = context;
251
- this.emitNeeded = emitNeeded;
252
- }
253
-
254
- void onPartialCompletion(WritableMap tokenResult) {
255
- if (!emitNeeded) return;
256
- context.emitPartialCompletion(tokenResult);
257
- }
258
- }
259
-
260
- public WritableMap loadSession(String path) {
261
- if (path == null || path.isEmpty()) {
262
- throw new IllegalArgumentException("File path is empty");
263
- }
264
- File file = new File(path);
265
- if (!file.exists()) {
266
- throw new IllegalArgumentException("File does not exist: " + path);
267
- }
268
- WritableMap result = loadSession(this.context, path);
269
- if (result.hasKey("error")) {
270
- throw new IllegalStateException(result.getString("error"));
271
- }
272
- return result;
273
- }
274
-
275
- public int saveSession(String path, int size) {
276
- if (path == null || path.isEmpty()) {
277
- throw new IllegalArgumentException("File path is empty");
278
- }
279
- return saveSession(this.context, path, size);
280
- }
281
-
282
- public WritableMap completion(ReadableMap params) {
283
- if (!params.hasKey("prompt")) {
284
- throw new IllegalArgumentException("Missing required parameter: prompt");
285
- }
286
-
287
- double[][] logit_bias = new double[0][0];
288
- if (params.hasKey("logit_bias")) {
289
- ReadableArray logit_bias_array = params.getArray("logit_bias");
290
- logit_bias = new double[logit_bias_array.size()][];
291
- for (int i = 0; i < logit_bias_array.size(); i++) {
292
- ReadableArray logit_bias_row = logit_bias_array.getArray(i);
293
- logit_bias[i] = new double[logit_bias_row.size()];
294
- for (int j = 0; j < logit_bias_row.size(); j++) {
295
- logit_bias[i][j] = logit_bias_row.getDouble(j);
296
- }
297
- }
298
- }
299
-
300
- WritableMap result = doCompletion(
301
- this.context,
302
- // String prompt,
303
- params.getString("prompt"),
304
- // int chat_format,
305
- params.hasKey("chat_format") ? params.getInt("chat_format") : 0,
306
- // String grammar,
307
- params.hasKey("grammar") ? params.getString("grammar") : "",
308
- // String json_schema,
309
- params.hasKey("json_schema") ? params.getString("json_schema") : "",
310
- // boolean grammar_lazy,
311
- params.hasKey("grammar_lazy") ? params.getBoolean("grammar_lazy") : false,
312
- // ReadableArray grammar_triggers,
313
- params.hasKey("grammar_triggers") ? params.getArray("grammar_triggers") : null,
314
- // ReadableArray preserved_tokens,
315
- params.hasKey("preserved_tokens") ? params.getArray("preserved_tokens") : null,
316
- // float temperature,
317
- params.hasKey("temperature") ? (float) params.getDouble("temperature") : 0.7f,
318
- // int n_threads,
319
- params.hasKey("n_threads") ? params.getInt("n_threads") : 0,
320
- // int n_predict,
321
- params.hasKey("n_predict") ? params.getInt("n_predict") : -1,
322
- // int n_probs,
323
- params.hasKey("n_probs") ? params.getInt("n_probs") : 0,
324
- // int penalty_last_n,
325
- params.hasKey("penalty_last_n") ? params.getInt("penalty_last_n") : 64,
326
- // float penalty_repeat,
327
- params.hasKey("penalty_repeat") ? (float) params.getDouble("penalty_repeat") : 1.00f,
328
- // float penalty_freq,
329
- params.hasKey("penalty_freq") ? (float) params.getDouble("penalty_freq") : 0.00f,
330
- // float penalty_present,
331
- params.hasKey("penalty_present") ? (float) params.getDouble("penalty_present") : 0.00f,
332
- // float mirostat,
333
- params.hasKey("mirostat") ? (float) params.getDouble("mirostat") : 0.00f,
334
- // float mirostat_tau,
335
- params.hasKey("mirostat_tau") ? (float) params.getDouble("mirostat_tau") : 5.00f,
336
- // float mirostat_eta,
337
- params.hasKey("mirostat_eta") ? (float) params.getDouble("mirostat_eta") : 0.10f,
338
- // int top_k,
339
- params.hasKey("top_k") ? params.getInt("top_k") : 40,
340
- // float top_p,
341
- params.hasKey("top_p") ? (float) params.getDouble("top_p") : 0.95f,
342
- // float min_p,
343
- params.hasKey("min_p") ? (float) params.getDouble("min_p") : 0.05f,
344
- // float xtc_threshold,
345
- params.hasKey("xtc_threshold") ? (float) params.getDouble("xtc_threshold") : 0.00f,
346
- // float xtc_probability,
347
- params.hasKey("xtc_probability") ? (float) params.getDouble("xtc_probability") : 0.00f,
348
- // float typical_p,
349
- params.hasKey("typical_p") ? (float) params.getDouble("typical_p") : 1.00f,
350
- // int seed,
351
- params.hasKey("seed") ? params.getInt("seed") : -1,
352
- // String[] stop,
353
- params.hasKey("stop") ? params.getArray("stop").toArrayList().toArray(new String[0]) : new String[0],
354
- // boolean ignore_eos,
355
- params.hasKey("ignore_eos") ? params.getBoolean("ignore_eos") : false,
356
- // double[][] logit_bias,
357
- logit_bias,
358
- // float dry_multiplier,
359
- params.hasKey("dry_multiplier") ? (float) params.getDouble("dry_multiplier") : 0.00f,
360
- // float dry_base,
361
- params.hasKey("dry_base") ? (float) params.getDouble("dry_base") : 1.75f,
362
- // int dry_allowed_length,
363
- params.hasKey("dry_allowed_length") ? params.getInt("dry_allowed_length") : 2,
364
- // int dry_penalty_last_n,
365
- params.hasKey("dry_penalty_last_n") ? params.getInt("dry_penalty_last_n") : -1,
366
- // float top_n_sigma,
367
- params.hasKey("top_n_sigma") ? (float) params.getDouble("top_n_sigma") : -1.0f,
368
- // String[] dry_sequence_breakers, when undef, we use the default definition from common.h
369
- params.hasKey("dry_sequence_breakers") ? params.getArray("dry_sequence_breakers").toArrayList().toArray(new String[0]) : new String[]{"\n", ":", "\"", "*"},
370
- // PartialCompletionCallback partial_completion_callback
371
- new PartialCompletionCallback(
372
- this,
373
- params.hasKey("emit_partial_completion") ? params.getBoolean("emit_partial_completion") : false
374
- )
375
- );
376
- if (result.hasKey("error")) {
377
- throw new IllegalStateException(result.getString("error"));
378
- }
379
- return result;
380
- }
381
-
382
- public void stopCompletion() {
383
- stopCompletion(this.context);
384
- }
385
-
386
- public boolean isPredicting() {
387
- return isPredicting(this.context);
388
- }
389
-
390
- public WritableMap tokenize(String text) {
391
- WritableMap result = Arguments.createMap();
392
- result.putArray("tokens", tokenize(this.context, text));
393
- return result;
394
- }
395
-
396
- public String detokenize(ReadableArray tokens) {
397
- int[] toks = new int[tokens.size()];
398
- for (int i = 0; i < tokens.size(); i++) {
399
- toks[i] = (int) tokens.getDouble(i);
400
- }
401
- return detokenize(this.context, toks);
402
- }
403
-
404
- public WritableMap getEmbedding(String text, ReadableMap params) {
405
- if (isEmbeddingEnabled(this.context) == false) {
406
- throw new IllegalStateException("Embedding is not enabled");
407
- }
408
- WritableMap result = embedding(
409
- this.context,
410
- text,
411
- // int embd_normalize,
412
- params.hasKey("embd_normalize") ? params.getInt("embd_normalize") : -1
413
- );
414
- if (result.hasKey("error")) {
415
- throw new IllegalStateException(result.getString("error"));
416
- }
417
- return result;
418
- }
419
-
420
- public String bench(int pp, int tg, int pl, int nr) {
421
- return bench(this.context, pp, tg, pl, nr);
422
- }
423
-
424
- public int applyLoraAdapters(ReadableArray loraAdapters) {
425
- int result = applyLoraAdapters(this.context, loraAdapters);
426
- if (result != 0) {
427
- throw new IllegalStateException("Failed to apply lora adapters");
428
- }
429
- return result;
430
- }
431
-
432
- public void removeLoraAdapters() {
433
- removeLoraAdapters(this.context);
434
- }
435
-
436
- public WritableArray getLoadedLoraAdapters() {
437
- return getLoadedLoraAdapters(this.context);
438
- }
439
-
440
- public void release() {
441
- freeContext(context);
442
- }
443
-
444
- static {
445
- Log.d(NAME, "Primary ABI: " + Build.SUPPORTED_ABIS[0]);
446
-
447
- String cpuFeatures = LlamaContext.getCpuFeatures();
448
- Log.d(NAME, "CPU features: " + cpuFeatures);
449
- boolean hasFp16 = cpuFeatures.contains("fp16") || cpuFeatures.contains("fphp");
450
- boolean hasDotProd = cpuFeatures.contains("dotprod") || cpuFeatures.contains("asimddp");
451
- boolean hasSve = cpuFeatures.contains("sve");
452
- boolean hasI8mm = cpuFeatures.contains("i8mm");
453
- boolean isAtLeastArmV82 = cpuFeatures.contains("asimd") && cpuFeatures.contains("crc32") && cpuFeatures.contains("aes");
454
- boolean isAtLeastArmV84 = cpuFeatures.contains("dcpop") && cpuFeatures.contains("uscat");
455
- Log.d(NAME, "- hasFp16: " + hasFp16);
456
- Log.d(NAME, "- hasDotProd: " + hasDotProd);
457
- Log.d(NAME, "- hasSve: " + hasSve);
458
- Log.d(NAME, "- hasI8mm: " + hasI8mm);
459
- Log.d(NAME, "- isAtLeastArmV82: " + isAtLeastArmV82);
460
- Log.d(NAME, "- isAtLeastArmV84: " + isAtLeastArmV84);
461
-
462
- // TODO: Add runtime check for cpu features
463
- if (LlamaContext.isArm64V8a()) {
464
- if (hasDotProd && hasI8mm) {
465
- Log.d(NAME, "Loading librnllama_v8_2_dotprod_i8mm.so");
466
- System.loadLibrary("rnllama_v8_2_dotprod_i8mm");
467
- loadedLibrary = "rnllama_v8_2_dotprod_i8mm";
468
- } else if (hasDotProd) {
469
- Log.d(NAME, "Loading librnllama_v8_2_dotprod.so");
470
- System.loadLibrary("rnllama_v8_2_dotprod");
471
- loadedLibrary = "rnllama_v8_2_dotprod";
472
- } else if (hasI8mm) {
473
- Log.d(NAME, "Loading librnllama_v8_2_i8mm.so");
474
- System.loadLibrary("rnllama_v8_2_i8mm");
475
- loadedLibrary = "rnllama_v8_2_i8mm";
476
- } else if (hasFp16) {
477
- Log.d(NAME, "Loading librnllama_v8_2.so");
478
- System.loadLibrary("rnllama_v8_2");
479
- loadedLibrary = "rnllama_v8_2";
480
- } else {
481
- Log.d(NAME, "Loading default librnllama_v8.so");
482
- System.loadLibrary("rnllama_v8");
483
- loadedLibrary = "rnllama_v8";
484
- }
485
- // Log.d(NAME, "Loading librnllama_v8_7.so with runtime feature detection");
486
- // System.loadLibrary("rnllama_v8_7");
487
- } else if (LlamaContext.isX86_64()) {
488
- Log.d(NAME, "Loading librnllama_x86_64.so");
489
- System.loadLibrary("rnllama_x86_64");
490
- loadedLibrary = "rnllama_x86_64";
491
- } else {
492
- Log.d(NAME, "ARM32 is not supported, skipping loading library");
493
- }
494
- }
495
-
496
- public static boolean isArm64V8a() {
497
- return Build.SUPPORTED_ABIS[0].equals("arm64-v8a");
498
- }
499
-
500
- private static boolean isX86_64() {
501
- return Build.SUPPORTED_ABIS[0].equals("x86_64");
502
- }
503
-
504
- private static boolean isArchNotSupported() {
505
- return isArm64V8a() == false && isX86_64() == false;
506
- }
507
-
508
- public static String getCpuFeatures() {
509
- File file = new File("/proc/cpuinfo");
510
- StringBuilder stringBuilder = new StringBuilder();
511
- try {
512
- BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
513
- String line;
514
- while ((line = bufferedReader.readLine()) != null) {
515
- if (line.startsWith("Features")) {
516
- stringBuilder.append(line);
517
- break;
518
- }
519
- }
520
- bufferedReader.close();
521
- return stringBuilder.toString();
522
- } catch (IOException e) {
523
- Log.w(NAME, "Couldn't read /proc/cpuinfo", e);
524
- return "";
525
- }
526
- }
527
-
528
- public void emitModelProgressUpdate(int progress) {
529
- WritableMap event = Arguments.createMap();
530
- event.putInt("progress", progress);
531
- eventEmitter.emit("@RNLlama_onInitContextProgress", event);
532
- }
533
-
534
- protected static native WritableMap modelInfo(
535
- String model,
536
- String[] skip
537
- );
538
- protected static native long initContext(
539
- String model,
540
- String chat_template,
541
- String reasoning_format,
542
- boolean embedding,
543
- int embd_normalize,
544
- int n_ctx,
545
- int n_batch,
546
- int n_ubatch,
547
- int n_threads,
548
- int n_gpu_layers, // TODO: Support this
549
- boolean flash_attn,
550
- String cache_type_k,
551
- String cache_type_v,
552
- boolean use_mlock,
553
- boolean use_mmap,
554
- boolean vocab_only,
555
- String lora,
556
- float lora_scaled,
557
- ReadableArray lora_list,
558
- float rope_freq_base,
559
- float rope_freq_scale,
560
- int pooling_type,
561
- LoadProgressCallback load_progress_callback
562
- );
563
- protected static native void interruptLoad(long contextPtr);
564
- protected static native WritableMap loadModelDetails(
565
- long contextPtr
566
- );
567
- protected static native WritableMap getFormattedChatWithJinja(
568
- long contextPtr,
569
- String messages,
570
- String chatTemplate,
571
- String jsonSchema,
572
- String tools,
573
- boolean parallelToolCalls,
574
- String toolChoice
575
- );
576
- protected static native String getFormattedChat(
577
- long contextPtr,
578
- String messages,
579
- String chatTemplate
580
- );
581
- protected static native WritableMap loadSession(
582
- long contextPtr,
583
- String path
584
- );
585
- protected static native int saveSession(
586
- long contextPtr,
587
- String path,
588
- int size
589
- );
590
- protected static native WritableMap doCompletion(
591
- long context_ptr,
592
- String prompt,
593
- int chat_format,
594
- String grammar,
595
- String json_schema,
596
- boolean grammar_lazy,
597
- ReadableArray grammar_triggers,
598
- ReadableArray preserved_tokens,
599
- float temperature,
600
- int n_threads,
601
- int n_predict,
602
- int n_probs,
603
- int penalty_last_n,
604
- float penalty_repeat,
605
- float penalty_freq,
606
- float penalty_present,
607
- float mirostat,
608
- float mirostat_tau,
609
- float mirostat_eta,
610
- int top_k,
611
- float top_p,
612
- float min_p,
613
- float xtc_threshold,
614
- float xtc_probability,
615
- float typical_p,
616
- int seed,
617
- String[] stop,
618
- boolean ignore_eos,
619
- double[][] logit_bias,
620
- float dry_multiplier,
621
- float dry_base,
622
- int dry_allowed_length,
623
- int dry_penalty_last_n,
624
- float top_n_sigma,
625
- String[] dry_sequence_breakers,
626
- PartialCompletionCallback partial_completion_callback
627
- );
628
- protected static native void stopCompletion(long contextPtr);
629
- protected static native boolean isPredicting(long contextPtr);
630
- protected static native WritableArray tokenize(long contextPtr, String text);
631
- protected static native String detokenize(long contextPtr, int[] tokens);
632
- protected static native boolean isEmbeddingEnabled(long contextPtr);
633
- protected static native WritableMap embedding(
634
- long contextPtr,
635
- String text,
636
- int embd_normalize
637
- );
638
- protected static native String bench(long contextPtr, int pp, int tg, int pl, int nr);
639
- protected static native int applyLoraAdapters(long contextPtr, ReadableArray loraAdapters);
640
- protected static native void removeLoraAdapters(long contextPtr);
641
- protected static native WritableArray getLoadedLoraAdapters(long contextPtr);
642
- protected static native void freeContext(long contextPtr);
643
- protected static native void setupLog(NativeLogCallback logCallback);
644
- protected static native void unsetLog();
645
- }
1
+ package com.rnllama;
2
+
3
+ import com.facebook.react.bridge.Arguments;
4
+ import com.facebook.react.bridge.WritableArray;
5
+ import com.facebook.react.bridge.WritableMap;
6
+ import com.facebook.react.bridge.ReadableMap;
7
+ import com.facebook.react.bridge.ReadableArray;
8
+ import com.facebook.react.bridge.ReactApplicationContext;
9
+ import com.facebook.react.modules.core.DeviceEventManagerModule;
10
+
11
+ import android.util.Log;
12
+ import android.os.Build;
13
+ import android.os.ParcelFileDescriptor;
14
+ import android.net.Uri;
15
+ import android.content.Intent;
16
+ import android.content.res.AssetManager;
17
+
18
+ import java.lang.StringBuilder;
19
+ import java.io.BufferedReader;
20
+ import java.io.FileReader;
21
+ import java.io.File;
22
+ import java.io.IOException;
23
+ import java.io.InputStream;
24
+ import java.io.FileInputStream;
25
+
26
+ public class LlamaContext {
27
+ public static final String NAME = "RNLlamaContext";
28
+
29
+ private static String loadedLibrary = "";
30
+
31
+ private static class NativeLogCallback {
32
+ DeviceEventManagerModule.RCTDeviceEventEmitter eventEmitter;
33
+
34
+ public NativeLogCallback(ReactApplicationContext reactContext) {
35
+ this.eventEmitter = reactContext.getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter.class);
36
+ }
37
+
38
+ void emitNativeLog(String level, String text) {
39
+ WritableMap event = Arguments.createMap();
40
+ event.putString("level", level);
41
+ event.putString("text", text);
42
+ eventEmitter.emit("@RNLlama_onNativeLog", event);
43
+ }
44
+ }
45
+
46
+ static void toggleNativeLog(ReactApplicationContext reactContext, boolean enabled) {
47
+ if (LlamaContext.isArchNotSupported()) {
48
+ throw new IllegalStateException("Only 64-bit architectures are supported");
49
+ }
50
+ if (enabled) {
51
+ setupLog(new NativeLogCallback(reactContext));
52
+ } else {
53
+ unsetLog();
54
+ }
55
+ }
56
+
57
+ private int id;
58
+ private ReactApplicationContext reactContext;
59
+ private long context;
60
+ private WritableMap modelDetails;
61
+ private int jobId = -1;
62
+ private DeviceEventManagerModule.RCTDeviceEventEmitter eventEmitter;
63
+
64
+ private byte[] ggufHeader = {0x47, 0x47, 0x55, 0x46};
65
+
66
+ private boolean isGGUF(final String filepath, final ReactApplicationContext reactContext) {
67
+ byte[] fileHeader = new byte[4];
68
+ InputStream fis = null;
69
+ try {
70
+ if (filepath.startsWith("content")) {
71
+ Uri uri = Uri.parse(filepath);
72
+ reactContext.getApplicationContext().getContentResolver().takePersistableUriPermission(uri, Intent.FLAG_GRANT_READ_URI_PERMISSION);
73
+ fis = reactContext.getApplicationContext().getContentResolver().openInputStream(uri);
74
+ } else {
75
+ fis = new FileInputStream(filepath);
76
+ }
77
+
78
+ int bytesRead = fis.read(fileHeader);
79
+ if(bytesRead < 4) {
80
+ return false;
81
+ }
82
+ for(int i = 0; i < 4; i++){
83
+ if(fileHeader[i] != ggufHeader[i])
84
+ return false;
85
+ }
86
+ return true;
87
+ } catch (Exception e) {
88
+ Log.e(NAME, "Failed to check GGUF: " + e.getMessage());
89
+ return false;
90
+ }finally {
91
+ if (fis != null) {
92
+ try {
93
+ fis.close();
94
+ } catch (Exception e) {
95
+ Log.d(NAME, "Closing InputStream failed.");
96
+ }
97
+ }
98
+ }
99
+ }
100
+
101
+ public LlamaContext(int id, ReactApplicationContext reactContext, ReadableMap params) {
102
+ if (LlamaContext.isArchNotSupported()) {
103
+ throw new IllegalStateException("Only 64-bit architectures are supported");
104
+ }
105
+ if (!params.hasKey("model")) {
106
+ throw new IllegalArgumentException("Missing required parameter: model");
107
+ }
108
+
109
+ String modelName = params.getString("model");
110
+
111
+ if(!isGGUF(modelName, reactContext)) {
112
+ throw new IllegalArgumentException("File is not in GGUF format");
113
+ }
114
+
115
+ if ( modelName.startsWith("content://")) {
116
+ Uri uri = Uri.parse(modelName);
117
+ try {
118
+ ParcelFileDescriptor pfd = reactContext.getApplicationContext().getContentResolver().openFileDescriptor(uri, "r");
119
+ modelName = "" + pfd.getFd();
120
+ } catch (Exception e) {
121
+ Log.e(NAME, "Failed to convert to FD!");
122
+ }
123
+ }
124
+
125
+ // Check if file has GGUF magic numbers
126
+ this.id = id;
127
+ eventEmitter = reactContext.getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter.class);
128
+ this.context = initContext(
129
+ // String model,
130
+ modelName,
131
+ // String chat_template,
132
+ params.hasKey("chat_template") ? params.getString("chat_template") : "",
133
+ // String reasoning_format,
134
+ params.hasKey("reasoning_format") ? params.getString("reasoning_format") : "none",
135
+ // boolean embedding,
136
+ params.hasKey("embedding") ? params.getBoolean("embedding") : false,
137
+ // int embd_normalize,
138
+ params.hasKey("embd_normalize") ? params.getInt("embd_normalize") : -1,
139
+ // int n_ctx,
140
+ params.hasKey("n_ctx") ? params.getInt("n_ctx") : 512,
141
+ // int n_batch,
142
+ params.hasKey("n_batch") ? params.getInt("n_batch") : 512,
143
+ // int n_ubatch,
144
+ params.hasKey("n_ubatch") ? params.getInt("n_ubatch") : 512,
145
+ // int n_threads,
146
+ params.hasKey("n_threads") ? params.getInt("n_threads") : 0,
147
+ // int n_gpu_layers, // TODO: Support this
148
+ params.hasKey("n_gpu_layers") ? params.getInt("n_gpu_layers") : 0,
149
+ // boolean flash_attn,
150
+ params.hasKey("flash_attn") ? params.getBoolean("flash_attn") : false,
151
+ // String cache_type_k,
152
+ params.hasKey("cache_type_k") ? params.getString("cache_type_k") : "f16",
153
+ // String cache_type_v,
154
+ params.hasKey("cache_type_v") ? params.getString("cache_type_v") : "f16",
155
+ // boolean use_mlock,
156
+ params.hasKey("use_mlock") ? params.getBoolean("use_mlock") : true,
157
+ // boolean use_mmap,
158
+ params.hasKey("use_mmap") ? params.getBoolean("use_mmap") : true,
159
+ //boolean vocab_only,
160
+ params.hasKey("vocab_only") ? params.getBoolean("vocab_only") : false,
161
+ // String lora,
162
+ params.hasKey("lora") ? params.getString("lora") : "",
163
+ // float lora_scaled,
164
+ params.hasKey("lora_scaled") ? (float) params.getDouble("lora_scaled") : 1.0f,
165
+ // ReadableArray lora_adapters,
166
+ params.hasKey("lora_list") ? params.getArray("lora_list") : null,
167
+ // float rope_freq_base,
168
+ params.hasKey("rope_freq_base") ? (float) params.getDouble("rope_freq_base") : 0.0f,
169
+ // float rope_freq_scale
170
+ params.hasKey("rope_freq_scale") ? (float) params.getDouble("rope_freq_scale") : 0.0f,
171
+ // int pooling_type,
172
+ params.hasKey("pooling_type") ? params.getInt("pooling_type") : -1,
173
+ // LoadProgressCallback load_progress_callback
174
+ params.hasKey("use_progress_callback") ? new LoadProgressCallback(this) : null
175
+ );
176
+ if (this.context == -1) {
177
+ throw new IllegalStateException("Failed to initialize context");
178
+ }
179
+ this.modelDetails = loadModelDetails(this.context);
180
+ this.reactContext = reactContext;
181
+ }
182
+
183
+ public void interruptLoad() {
184
+ interruptLoad(this.context);
185
+ }
186
+
187
+ public long getContext() {
188
+ return context;
189
+ }
190
+
191
+ public WritableMap getModelDetails() {
192
+ return modelDetails;
193
+ }
194
+
195
+ public String getLoadedLibrary() {
196
+ return loadedLibrary;
197
+ }
198
+
199
+ public WritableMap getFormattedChatWithJinja(String messages, String chatTemplate, ReadableMap params) {
200
+ String jsonSchema = params.hasKey("json_schema") ? params.getString("json_schema") : "";
201
+ String tools = params.hasKey("tools") ? params.getString("tools") : "";
202
+ Boolean parallelToolCalls = params.hasKey("parallel_tool_calls") ? params.getBoolean("parallel_tool_calls") : false;
203
+ String toolChoice = params.hasKey("tool_choice") ? params.getString("tool_choice") : "";
204
+ return getFormattedChatWithJinja(
205
+ this.context,
206
+ messages,
207
+ chatTemplate == null ? "" : chatTemplate,
208
+ jsonSchema,
209
+ tools,
210
+ parallelToolCalls,
211
+ toolChoice
212
+ );
213
+ }
214
+
215
+ public String getFormattedChat(String messages, String chatTemplate) {
216
+ return getFormattedChat(this.context, messages, chatTemplate == null ? "" : chatTemplate);
217
+ }
218
+
219
+ private void emitLoadProgress(int progress) {
220
+ WritableMap event = Arguments.createMap();
221
+ event.putInt("contextId", LlamaContext.this.id);
222
+ event.putInt("progress", progress);
223
+ eventEmitter.emit("@RNLlama_onInitContextProgress", event);
224
+ }
225
+
226
+ private static class LoadProgressCallback {
227
+ LlamaContext context;
228
+
229
+ public LoadProgressCallback(LlamaContext context) {
230
+ this.context = context;
231
+ }
232
+
233
+ void onLoadProgress(int progress) {
234
+ context.emitLoadProgress(progress);
235
+ }
236
+ }
237
+
238
+ private void emitPartialCompletion(WritableMap tokenResult) {
239
+ WritableMap event = Arguments.createMap();
240
+ event.putInt("contextId", LlamaContext.this.id);
241
+ event.putMap("tokenResult", tokenResult);
242
+ eventEmitter.emit("@RNLlama_onToken", event);
243
+ }
244
+
245
+ private static class PartialCompletionCallback {
246
+ LlamaContext context;
247
+ boolean emitNeeded;
248
+
249
+ public PartialCompletionCallback(LlamaContext context, boolean emitNeeded) {
250
+ this.context = context;
251
+ this.emitNeeded = emitNeeded;
252
+ }
253
+
254
+ void onPartialCompletion(WritableMap tokenResult) {
255
+ if (!emitNeeded) return;
256
+ context.emitPartialCompletion(tokenResult);
257
+ }
258
+ }
259
+
260
+ public WritableMap loadSession(String path) {
261
+ if (path == null || path.isEmpty()) {
262
+ throw new IllegalArgumentException("File path is empty");
263
+ }
264
+ File file = new File(path);
265
+ if (!file.exists()) {
266
+ throw new IllegalArgumentException("File does not exist: " + path);
267
+ }
268
+ WritableMap result = loadSession(this.context, path);
269
+ if (result.hasKey("error")) {
270
+ throw new IllegalStateException(result.getString("error"));
271
+ }
272
+ return result;
273
+ }
274
+
275
+ public int saveSession(String path, int size) {
276
+ if (path == null || path.isEmpty()) {
277
+ throw new IllegalArgumentException("File path is empty");
278
+ }
279
+ return saveSession(this.context, path, size);
280
+ }
281
+
282
+ public WritableMap completion(ReadableMap params) {
283
+ if (!params.hasKey("prompt")) {
284
+ throw new IllegalArgumentException("Missing required parameter: prompt");
285
+ }
286
+
287
+ double[][] logit_bias = new double[0][0];
288
+ if (params.hasKey("logit_bias")) {
289
+ ReadableArray logit_bias_array = params.getArray("logit_bias");
290
+ logit_bias = new double[logit_bias_array.size()][];
291
+ for (int i = 0; i < logit_bias_array.size(); i++) {
292
+ ReadableArray logit_bias_row = logit_bias_array.getArray(i);
293
+ logit_bias[i] = new double[logit_bias_row.size()];
294
+ for (int j = 0; j < logit_bias_row.size(); j++) {
295
+ logit_bias[i][j] = logit_bias_row.getDouble(j);
296
+ }
297
+ }
298
+ }
299
+
300
+ WritableMap result = doCompletion(
301
+ this.context,
302
+ // String prompt,
303
+ params.getString("prompt"),
304
+ // int chat_format,
305
+ params.hasKey("chat_format") ? params.getInt("chat_format") : 0,
306
+ // String grammar,
307
+ params.hasKey("grammar") ? params.getString("grammar") : "",
308
+ // String json_schema,
309
+ params.hasKey("json_schema") ? params.getString("json_schema") : "",
310
+ // boolean grammar_lazy,
311
+ params.hasKey("grammar_lazy") ? params.getBoolean("grammar_lazy") : false,
312
+ // ReadableArray grammar_triggers,
313
+ params.hasKey("grammar_triggers") ? params.getArray("grammar_triggers") : null,
314
+ // ReadableArray preserved_tokens,
315
+ params.hasKey("preserved_tokens") ? params.getArray("preserved_tokens") : null,
316
+ // float temperature,
317
+ params.hasKey("temperature") ? (float) params.getDouble("temperature") : 0.7f,
318
+ // int n_threads,
319
+ params.hasKey("n_threads") ? params.getInt("n_threads") : 0,
320
+ // int n_predict,
321
+ params.hasKey("n_predict") ? params.getInt("n_predict") : -1,
322
+ // int n_probs,
323
+ params.hasKey("n_probs") ? params.getInt("n_probs") : 0,
324
+ // int penalty_last_n,
325
+ params.hasKey("penalty_last_n") ? params.getInt("penalty_last_n") : 64,
326
+ // float penalty_repeat,
327
+ params.hasKey("penalty_repeat") ? (float) params.getDouble("penalty_repeat") : 1.00f,
328
+ // float penalty_freq,
329
+ params.hasKey("penalty_freq") ? (float) params.getDouble("penalty_freq") : 0.00f,
330
+ // float penalty_present,
331
+ params.hasKey("penalty_present") ? (float) params.getDouble("penalty_present") : 0.00f,
332
+ // float mirostat,
333
+ params.hasKey("mirostat") ? (float) params.getDouble("mirostat") : 0.00f,
334
+ // float mirostat_tau,
335
+ params.hasKey("mirostat_tau") ? (float) params.getDouble("mirostat_tau") : 5.00f,
336
+ // float mirostat_eta,
337
+ params.hasKey("mirostat_eta") ? (float) params.getDouble("mirostat_eta") : 0.10f,
338
+ // int top_k,
339
+ params.hasKey("top_k") ? params.getInt("top_k") : 40,
340
+ // float top_p,
341
+ params.hasKey("top_p") ? (float) params.getDouble("top_p") : 0.95f,
342
+ // float min_p,
343
+ params.hasKey("min_p") ? (float) params.getDouble("min_p") : 0.05f,
344
+ // float xtc_threshold,
345
+ params.hasKey("xtc_threshold") ? (float) params.getDouble("xtc_threshold") : 0.00f,
346
+ // float xtc_probability,
347
+ params.hasKey("xtc_probability") ? (float) params.getDouble("xtc_probability") : 0.00f,
348
+ // float typical_p,
349
+ params.hasKey("typical_p") ? (float) params.getDouble("typical_p") : 1.00f,
350
+ // int seed,
351
+ params.hasKey("seed") ? params.getInt("seed") : -1,
352
+ // String[] stop,
353
+ params.hasKey("stop") ? params.getArray("stop").toArrayList().toArray(new String[0]) : new String[0],
354
+ // boolean ignore_eos,
355
+ params.hasKey("ignore_eos") ? params.getBoolean("ignore_eos") : false,
356
+ // double[][] logit_bias,
357
+ logit_bias,
358
+ // float dry_multiplier,
359
+ params.hasKey("dry_multiplier") ? (float) params.getDouble("dry_multiplier") : 0.00f,
360
+ // float dry_base,
361
+ params.hasKey("dry_base") ? (float) params.getDouble("dry_base") : 1.75f,
362
+ // int dry_allowed_length,
363
+ params.hasKey("dry_allowed_length") ? params.getInt("dry_allowed_length") : 2,
364
+ // int dry_penalty_last_n,
365
+ params.hasKey("dry_penalty_last_n") ? params.getInt("dry_penalty_last_n") : -1,
366
+ // float top_n_sigma,
367
+ params.hasKey("top_n_sigma") ? (float) params.getDouble("top_n_sigma") : -1.0f,
368
+ // String[] dry_sequence_breakers, when undef, we use the default definition from common.h
369
+ params.hasKey("dry_sequence_breakers") ? params.getArray("dry_sequence_breakers").toArrayList().toArray(new String[0]) : new String[]{"\n", ":", "\"", "*"},
370
+ // PartialCompletionCallback partial_completion_callback
371
+ new PartialCompletionCallback(
372
+ this,
373
+ params.hasKey("emit_partial_completion") ? params.getBoolean("emit_partial_completion") : false
374
+ )
375
+ );
376
+ if (result.hasKey("error")) {
377
+ throw new IllegalStateException(result.getString("error"));
378
+ }
379
+ return result;
380
+ }
381
+
382
+ public void stopCompletion() {
383
+ stopCompletion(this.context);
384
+ }
385
+
386
+ public boolean isPredicting() {
387
+ return isPredicting(this.context);
388
+ }
389
+
390
+ public WritableMap tokenize(String text) {
391
+ WritableMap result = Arguments.createMap();
392
+ result.putArray("tokens", tokenize(this.context, text));
393
+ return result;
394
+ }
395
+
396
+ public String detokenize(ReadableArray tokens) {
397
+ int[] toks = new int[tokens.size()];
398
+ for (int i = 0; i < tokens.size(); i++) {
399
+ toks[i] = (int) tokens.getDouble(i);
400
+ }
401
+ return detokenize(this.context, toks);
402
+ }
403
+
404
+ public WritableMap getEmbedding(String text, ReadableMap params) {
405
+ if (isEmbeddingEnabled(this.context) == false) {
406
+ throw new IllegalStateException("Embedding is not enabled");
407
+ }
408
+ WritableMap result = embedding(
409
+ this.context,
410
+ text,
411
+ // int embd_normalize,
412
+ params.hasKey("embd_normalize") ? params.getInt("embd_normalize") : -1
413
+ );
414
+ if (result.hasKey("error")) {
415
+ throw new IllegalStateException(result.getString("error"));
416
+ }
417
+ return result;
418
+ }
419
+
420
+ public String bench(int pp, int tg, int pl, int nr) {
421
+ return bench(this.context, pp, tg, pl, nr);
422
+ }
423
+
424
+ public int applyLoraAdapters(ReadableArray loraAdapters) {
425
+ int result = applyLoraAdapters(this.context, loraAdapters);
426
+ if (result != 0) {
427
+ throw new IllegalStateException("Failed to apply lora adapters");
428
+ }
429
+ return result;
430
+ }
431
+
432
+ public void removeLoraAdapters() {
433
+ removeLoraAdapters(this.context);
434
+ }
435
+
436
+ public WritableArray getLoadedLoraAdapters() {
437
+ return getLoadedLoraAdapters(this.context);
438
+ }
439
+
440
+ public void release() {
441
+ freeContext(context);
442
+ }
443
+
444
+ static {
445
+ Log.d(NAME, "Primary ABI: " + Build.SUPPORTED_ABIS[0]);
446
+
447
+ String cpuFeatures = LlamaContext.getCpuFeatures();
448
+ Log.d(NAME, "CPU features: " + cpuFeatures);
449
+ boolean hasFp16 = cpuFeatures.contains("fp16") || cpuFeatures.contains("fphp");
450
+ boolean hasDotProd = cpuFeatures.contains("dotprod") || cpuFeatures.contains("asimddp");
451
+ boolean hasSve = cpuFeatures.contains("sve");
452
+ boolean hasI8mm = cpuFeatures.contains("i8mm");
453
+ boolean isAtLeastArmV82 = cpuFeatures.contains("asimd") && cpuFeatures.contains("crc32") && cpuFeatures.contains("aes");
454
+ boolean isAtLeastArmV84 = cpuFeatures.contains("dcpop") && cpuFeatures.contains("uscat");
455
+ Log.d(NAME, "- hasFp16: " + hasFp16);
456
+ Log.d(NAME, "- hasDotProd: " + hasDotProd);
457
+ Log.d(NAME, "- hasSve: " + hasSve);
458
+ Log.d(NAME, "- hasI8mm: " + hasI8mm);
459
+ Log.d(NAME, "- isAtLeastArmV82: " + isAtLeastArmV82);
460
+ Log.d(NAME, "- isAtLeastArmV84: " + isAtLeastArmV84);
461
+
462
+ // TODO: Add runtime check for cpu features
463
+ if (LlamaContext.isArm64V8a()) {
464
+ if (hasDotProd && hasI8mm) {
465
+ Log.d(NAME, "Loading librnllama_v8_2_dotprod_i8mm.so");
466
+ System.loadLibrary("rnllama_v8_2_dotprod_i8mm");
467
+ loadedLibrary = "rnllama_v8_2_dotprod_i8mm";
468
+ } else if (hasDotProd) {
469
+ Log.d(NAME, "Loading librnllama_v8_2_dotprod.so");
470
+ System.loadLibrary("rnllama_v8_2_dotprod");
471
+ loadedLibrary = "rnllama_v8_2_dotprod";
472
+ } else if (hasI8mm) {
473
+ Log.d(NAME, "Loading librnllama_v8_2_i8mm.so");
474
+ System.loadLibrary("rnllama_v8_2_i8mm");
475
+ loadedLibrary = "rnllama_v8_2_i8mm";
476
+ } else if (hasFp16) {
477
+ Log.d(NAME, "Loading librnllama_v8_2.so");
478
+ System.loadLibrary("rnllama_v8_2");
479
+ loadedLibrary = "rnllama_v8_2";
480
+ } else {
481
+ Log.d(NAME, "Loading default librnllama_v8.so");
482
+ System.loadLibrary("rnllama_v8");
483
+ loadedLibrary = "rnllama_v8";
484
+ }
485
+ // Log.d(NAME, "Loading librnllama_v8_7.so with runtime feature detection");
486
+ // System.loadLibrary("rnllama_v8_7");
487
+ } else if (LlamaContext.isX86_64()) {
488
+ Log.d(NAME, "Loading librnllama_x86_64.so");
489
+ System.loadLibrary("rnllama_x86_64");
490
+ loadedLibrary = "rnllama_x86_64";
491
+ } else {
492
+ Log.d(NAME, "ARM32 is not supported, skipping loading library");
493
+ }
494
+ }
495
+
496
+ public static boolean isArm64V8a() {
497
+ return Build.SUPPORTED_ABIS[0].equals("arm64-v8a");
498
+ }
499
+
500
+ private static boolean isX86_64() {
501
+ return Build.SUPPORTED_ABIS[0].equals("x86_64");
502
+ }
503
+
504
+ private static boolean isArchNotSupported() {
505
+ return isArm64V8a() == false && isX86_64() == false;
506
+ }
507
+
508
+ public static String getCpuFeatures() {
509
+ File file = new File("/proc/cpuinfo");
510
+ StringBuilder stringBuilder = new StringBuilder();
511
+ try {
512
+ BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
513
+ String line;
514
+ while ((line = bufferedReader.readLine()) != null) {
515
+ if (line.startsWith("Features")) {
516
+ stringBuilder.append(line);
517
+ break;
518
+ }
519
+ }
520
+ bufferedReader.close();
521
+ return stringBuilder.toString();
522
+ } catch (IOException e) {
523
+ Log.w(NAME, "Couldn't read /proc/cpuinfo", e);
524
+ return "";
525
+ }
526
+ }
527
+
528
+ public void emitModelProgressUpdate(int progress) {
529
+ WritableMap event = Arguments.createMap();
530
+ event.putInt("progress", progress);
531
+ eventEmitter.emit("@RNLlama_onInitContextProgress", event);
532
+ }
533
+
534
+ protected static native WritableMap modelInfo(
535
+ String model,
536
+ String[] skip
537
+ );
538
+ protected static native long initContext(
539
+ String model,
540
+ String chat_template,
541
+ String reasoning_format,
542
+ boolean embedding,
543
+ int embd_normalize,
544
+ int n_ctx,
545
+ int n_batch,
546
+ int n_ubatch,
547
+ int n_threads,
548
+ int n_gpu_layers, // TODO: Support this
549
+ boolean flash_attn,
550
+ String cache_type_k,
551
+ String cache_type_v,
552
+ boolean use_mlock,
553
+ boolean use_mmap,
554
+ boolean vocab_only,
555
+ String lora,
556
+ float lora_scaled,
557
+ ReadableArray lora_list,
558
+ float rope_freq_base,
559
+ float rope_freq_scale,
560
+ int pooling_type,
561
+ LoadProgressCallback load_progress_callback
562
+ );
563
+ protected static native void interruptLoad(long contextPtr);
564
+ protected static native WritableMap loadModelDetails(
565
+ long contextPtr
566
+ );
567
+ protected static native WritableMap getFormattedChatWithJinja(
568
+ long contextPtr,
569
+ String messages,
570
+ String chatTemplate,
571
+ String jsonSchema,
572
+ String tools,
573
+ boolean parallelToolCalls,
574
+ String toolChoice
575
+ );
576
+ protected static native String getFormattedChat(
577
+ long contextPtr,
578
+ String messages,
579
+ String chatTemplate
580
+ );
581
+ protected static native WritableMap loadSession(
582
+ long contextPtr,
583
+ String path
584
+ );
585
+ protected static native int saveSession(
586
+ long contextPtr,
587
+ String path,
588
+ int size
589
+ );
590
+ protected static native WritableMap doCompletion(
591
+ long context_ptr,
592
+ String prompt,
593
+ int chat_format,
594
+ String grammar,
595
+ String json_schema,
596
+ boolean grammar_lazy,
597
+ ReadableArray grammar_triggers,
598
+ ReadableArray preserved_tokens,
599
+ float temperature,
600
+ int n_threads,
601
+ int n_predict,
602
+ int n_probs,
603
+ int penalty_last_n,
604
+ float penalty_repeat,
605
+ float penalty_freq,
606
+ float penalty_present,
607
+ float mirostat,
608
+ float mirostat_tau,
609
+ float mirostat_eta,
610
+ int top_k,
611
+ float top_p,
612
+ float min_p,
613
+ float xtc_threshold,
614
+ float xtc_probability,
615
+ float typical_p,
616
+ int seed,
617
+ String[] stop,
618
+ boolean ignore_eos,
619
+ double[][] logit_bias,
620
+ float dry_multiplier,
621
+ float dry_base,
622
+ int dry_allowed_length,
623
+ int dry_penalty_last_n,
624
+ float top_n_sigma,
625
+ String[] dry_sequence_breakers,
626
+ PartialCompletionCallback partial_completion_callback
627
+ );
628
+ protected static native void stopCompletion(long contextPtr);
629
+ protected static native boolean isPredicting(long contextPtr);
630
+ protected static native WritableArray tokenize(long contextPtr, String text);
631
+ protected static native String detokenize(long contextPtr, int[] tokens);
632
+ protected static native boolean isEmbeddingEnabled(long contextPtr);
633
+ protected static native WritableMap embedding(
634
+ long contextPtr,
635
+ String text,
636
+ int embd_normalize
637
+ );
638
+ protected static native String bench(long contextPtr, int pp, int tg, int pl, int nr);
639
+ protected static native int applyLoraAdapters(long contextPtr, ReadableArray loraAdapters);
640
+ protected static native void removeLoraAdapters(long contextPtr);
641
+ protected static native WritableArray getLoadedLoraAdapters(long contextPtr);
642
+ protected static native void freeContext(long contextPtr);
643
+ protected static native void setupLog(NativeLogCallback logCallback);
644
+ protected static native void unsetLog();
645
+ }