cui-llama.rn 1.5.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. package/LICENSE +20 -20
  2. package/README.md +345 -319
  3. package/android/build.gradle +116 -116
  4. package/android/gradle.properties +5 -5
  5. package/android/src/main/AndroidManifest.xml +4 -4
  6. package/android/src/main/CMakeLists.txt +129 -124
  7. package/android/src/main/java/com/rnllama/LlamaContext.java +648 -645
  8. package/android/src/main/java/com/rnllama/RNLlama.java +695 -695
  9. package/android/src/main/java/com/rnllama/RNLlamaPackage.java +48 -48
  10. package/android/src/main/jni-utils.h +100 -100
  11. package/android/src/main/jni.cpp +1279 -1263
  12. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  14. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  15. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  16. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  17. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  18. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  19. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  20. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +135 -135
  21. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +136 -136
  22. package/cpp/LICENSE +21 -0
  23. package/cpp/README.md +4 -4
  24. package/cpp/chat.cpp +1 -1
  25. package/cpp/common.cpp +17 -2
  26. package/cpp/common.h +7 -3
  27. package/cpp/ggml-alloc.c +4 -1
  28. package/cpp/ggml-cpp.h +1 -1
  29. package/cpp/ggml-cpu/amx/amx.cpp +221 -0
  30. package/cpp/ggml-cpu/amx/amx.h +8 -0
  31. package/cpp/ggml-cpu/amx/common.h +91 -0
  32. package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
  33. package/cpp/ggml-cpu/amx/mmq.h +10 -0
  34. package/cpp/{binary-ops.h → ggml-cpu/binary-ops.h} +1 -1
  35. package/cpp/ggml-cpu/common.h +72 -0
  36. package/cpp/{ggml-cpu-aarch64.cpp → ggml-cpu/ggml-cpu-aarch64.cpp} +809 -101
  37. package/cpp/{ggml-cpu.c → ggml-cpu/ggml-cpu.c} +109 -42
  38. package/cpp/{ggml-cpu.cpp → ggml-cpu/ggml-cpu.cpp} +3 -0
  39. package/cpp/{ops.cpp → ggml-cpu/ops.cpp} +246 -160
  40. package/cpp/{ops.h → ggml-cpu/ops.h} +2 -20
  41. package/cpp/{sgemm.cpp → ggml-cpu/sgemm.cpp} +501 -0
  42. package/cpp/{simd-mappings.h → ggml-cpu/simd-mappings.h} +7 -3
  43. package/cpp/{unary-ops.h → ggml-cpu/unary-ops.h} +1 -1
  44. package/cpp/ggml-cpu.h +5 -0
  45. package/cpp/ggml-impl.h +16 -9
  46. package/cpp/ggml-llama-sim.metallib +0 -0
  47. package/cpp/ggml-llama.metallib +0 -0
  48. package/cpp/ggml-metal-impl.h +597 -597
  49. package/cpp/ggml-metal.m +496 -47
  50. package/cpp/ggml.c +134 -244
  51. package/cpp/ggml.h +62 -95
  52. package/cpp/json-schema-to-grammar.cpp +3 -0
  53. package/cpp/llama-arch.cpp +46 -17
  54. package/cpp/llama-arch.h +9 -0
  55. package/cpp/llama-batch.cpp +5 -1
  56. package/cpp/llama-batch.h +2 -1
  57. package/cpp/llama-chat.cpp +31 -10
  58. package/cpp/llama-chat.h +3 -2
  59. package/cpp/llama-context.cpp +104 -489
  60. package/cpp/llama-context.h +14 -30
  61. package/cpp/llama-graph.cpp +69 -62
  62. package/cpp/llama-graph.h +21 -18
  63. package/cpp/llama-hparams.h +5 -0
  64. package/cpp/llama-kv-cache.cpp +1497 -391
  65. package/cpp/llama-kv-cache.h +272 -80
  66. package/cpp/llama-memory.h +11 -1
  67. package/cpp/llama-model.cpp +502 -176
  68. package/cpp/llama-model.h +13 -3
  69. package/cpp/llama-sampling.cpp +2 -1
  70. package/cpp/llama-vocab.cpp +8 -1
  71. package/cpp/llama.h +14 -11
  72. package/cpp/rn-llama.cpp +721 -873
  73. package/cpp/rn-llama.h +134 -138
  74. package/cpp/sampling.h +107 -107
  75. package/cpp/unicode-data.cpp +7034 -7034
  76. package/cpp/unicode-data.h +20 -20
  77. package/cpp/unicode.cpp +849 -849
  78. package/cpp/unicode.h +66 -66
  79. package/ios/CMakeLists.txt +119 -108
  80. package/ios/RNLlama.h +13 -7
  81. package/ios/RNLlama.mm +423 -405
  82. package/ios/RNLlamaContext.h +57 -57
  83. package/ios/RNLlamaContext.mm +833 -835
  84. package/ios/rnllama.xcframework/Info.plist +74 -74
  85. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +143 -0
  86. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +681 -0
  87. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
  88. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
  89. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  90. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
  91. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
  92. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
  93. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +143 -0
  94. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +601 -0
  95. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  96. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
  97. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
  98. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
  99. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
  100. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +2189 -0
  101. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/gguf.h +202 -0
  102. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  103. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json.hpp +24766 -0
  104. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
  105. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +437 -0
  106. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +89 -0
  107. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +57 -0
  108. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +249 -0
  109. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
  110. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
  111. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
  112. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +595 -0
  113. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +161 -0
  114. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
  115. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-io.h +35 -0
  116. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +405 -0
  117. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +31 -0
  118. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
  119. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
  120. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +419 -0
  121. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
  122. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +1437 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/log.h +132 -0
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +134 -0
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sampling.h +107 -0
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/speculative.h +28 -0
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
  131. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode.h +66 -0
  132. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
  133. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  134. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  135. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
  136. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +681 -0
  137. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
  138. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
  139. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  140. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
  141. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
  142. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
  143. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +143 -0
  144. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +601 -0
  145. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  146. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
  147. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
  148. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
  149. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
  150. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2189 -0
  151. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
  152. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  153. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
  154. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
  155. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +437 -0
  156. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +89 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +57 -0
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +249 -0
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
  160. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
  161. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
  162. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +595 -0
  163. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +161 -0
  164. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
  165. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
  166. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +405 -0
  167. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +31 -0
  168. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
  169. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
  170. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +419 -0
  171. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
  172. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
  173. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1437 -0
  174. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
  175. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  176. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  177. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +134 -0
  178. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
  179. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
  180. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
  181. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
  182. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  183. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
  184. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  185. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  186. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +143 -0
  187. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +681 -0
  188. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
  189. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
  190. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  191. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
  192. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
  193. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
  194. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +143 -0
  195. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +601 -0
  196. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  197. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
  198. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
  199. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
  200. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
  201. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +2189 -0
  202. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/gguf.h +202 -0
  203. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  204. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json.hpp +24766 -0
  205. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
  206. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +437 -0
  207. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +89 -0
  208. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +57 -0
  209. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +249 -0
  210. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
  211. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
  212. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
  213. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +595 -0
  214. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +161 -0
  215. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
  216. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-io.h +35 -0
  217. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +405 -0
  218. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +31 -0
  219. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
  220. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
  221. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +419 -0
  222. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
  223. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
  224. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +1437 -0
  225. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/log.h +132 -0
  226. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  227. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  228. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +134 -0
  229. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sampling.h +107 -0
  230. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/speculative.h +28 -0
  231. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
  232. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode.h +66 -0
  233. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
  234. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  235. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  236. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
  237. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +681 -0
  238. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
  239. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
  240. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  241. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
  242. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
  243. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
  244. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +143 -0
  245. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +601 -0
  246. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  247. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
  248. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
  249. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
  250. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
  251. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2189 -0
  252. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
  253. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  254. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
  255. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
  256. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +437 -0
  257. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +89 -0
  258. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +57 -0
  259. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +249 -0
  260. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
  261. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
  262. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
  263. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +595 -0
  264. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +161 -0
  265. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
  266. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
  267. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +405 -0
  268. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +31 -0
  269. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
  270. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
  271. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +419 -0
  272. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
  273. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
  274. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1437 -0
  275. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
  276. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  277. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  278. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +134 -0
  279. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
  280. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
  281. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
  282. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
  283. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  284. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
  285. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  286. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  287. package/jest/mock.js +203 -203
  288. package/lib/commonjs/NativeRNLlama.js +1 -2
  289. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  290. package/lib/commonjs/chat.js.map +1 -1
  291. package/lib/commonjs/grammar.js +12 -31
  292. package/lib/commonjs/grammar.js.map +1 -1
  293. package/lib/commonjs/index.js +47 -47
  294. package/lib/commonjs/index.js.map +1 -1
  295. package/lib/commonjs/package.json +1 -0
  296. package/lib/module/NativeRNLlama.js +2 -0
  297. package/lib/module/NativeRNLlama.js.map +1 -1
  298. package/lib/module/chat.js +2 -0
  299. package/lib/module/chat.js.map +1 -1
  300. package/lib/module/grammar.js +14 -31
  301. package/lib/module/grammar.js.map +1 -1
  302. package/lib/module/index.js +47 -45
  303. package/lib/module/index.js.map +1 -1
  304. package/lib/module/package.json +1 -0
  305. package/lib/typescript/NativeRNLlama.d.ts +10 -4
  306. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  307. package/lib/typescript/index.d.ts.map +1 -1
  308. package/llama-rn.podspec +48 -48
  309. package/package.json +233 -233
  310. package/src/NativeRNLlama.ts +431 -426
  311. package/src/chat.ts +44 -44
  312. package/src/grammar.ts +854 -854
  313. package/src/index.ts +495 -487
  314. /package/cpp/{binary-ops.cpp → ggml-cpu/binary-ops.cpp} +0 -0
  315. /package/cpp/{ggml-cpu-aarch64.h → ggml-cpu/ggml-cpu-aarch64.h} +0 -0
  316. /package/cpp/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -0
  317. /package/cpp/{ggml-cpu-quants.c → ggml-cpu/ggml-cpu-quants.c} +0 -0
  318. /package/cpp/{ggml-cpu-quants.h → ggml-cpu/ggml-cpu-quants.h} +0 -0
  319. /package/cpp/{ggml-cpu-traits.cpp → ggml-cpu/ggml-cpu-traits.cpp} +0 -0
  320. /package/cpp/{ggml-cpu-traits.h → ggml-cpu/ggml-cpu-traits.h} +0 -0
  321. /package/cpp/{sgemm.h → ggml-cpu/sgemm.h} +0 -0
  322. /package/cpp/{unary-ops.cpp → ggml-cpu/unary-ops.cpp} +0 -0
  323. /package/cpp/{vec.cpp → ggml-cpu/vec.cpp} +0 -0
  324. /package/cpp/{vec.h → ggml-cpu/vec.h} +0 -0
package/cpp/llama-model.h CHANGED
@@ -36,14 +36,17 @@ enum llm_type {
36
36
  LLM_TYPE_335M,
37
37
  LLM_TYPE_410M,
38
38
  LLM_TYPE_450M,
39
+ LLM_TYPE_475M,
39
40
  LLM_TYPE_770M,
40
41
  LLM_TYPE_780M,
41
42
  LLM_TYPE_0_5B,
43
+ LLM_TYPE_0_6B,
42
44
  LLM_TYPE_1B,
43
45
  LLM_TYPE_1_3B,
44
46
  LLM_TYPE_1_4B,
45
47
  LLM_TYPE_1_5B,
46
48
  LLM_TYPE_1_6B,
49
+ LLM_TYPE_1_7B,
47
50
  LLM_TYPE_1_8B,
48
51
  LLM_TYPE_2B,
49
52
  LLM_TYPE_2_8B,
@@ -62,6 +65,7 @@ enum llm_type {
62
65
  LLM_TYPE_15B,
63
66
  LLM_TYPE_16B,
64
67
  LLM_TYPE_20B,
68
+ LLM_TYPE_27B,
65
69
  LLM_TYPE_30B,
66
70
  LLM_TYPE_32B,
67
71
  LLM_TYPE_34B,
@@ -70,6 +74,7 @@ enum llm_type {
70
74
  LLM_TYPE_65B,
71
75
  LLM_TYPE_70B,
72
76
  LLM_TYPE_236B,
77
+ LLM_TYPE_290B,
73
78
  LLM_TYPE_314B,
74
79
  LLM_TYPE_671B,
75
80
  LLM_TYPE_SMALL,
@@ -84,10 +89,10 @@ enum llm_type {
84
89
  LLM_TYPE_16x3_8B,
85
90
  LLM_TYPE_10B_128x3_66B,
86
91
  LLM_TYPE_57B_A14B,
87
- LLM_TYPE_27B,
88
- LLM_TYPE_290B,
89
92
  LLM_TYPE_17B_16E, // llama4 Scout
90
93
  LLM_TYPE_17B_128E, // llama4 Maverick
94
+ LLM_TYPE_30B_A3B,
95
+ LLM_TYPE_235B_A22B,
91
96
  };
92
97
 
93
98
  struct llama_layer_posnet {
@@ -171,6 +176,8 @@ struct llama_layer {
171
176
  struct lm_ggml_tensor * wq_b = nullptr;
172
177
  struct lm_ggml_tensor * wkv_a_mqa = nullptr;
173
178
  struct lm_ggml_tensor * wkv_b = nullptr;
179
+ struct lm_ggml_tensor * wk_b = nullptr;
180
+ struct lm_ggml_tensor * wv_b = nullptr;
174
181
  struct lm_ggml_tensor * wq_cross = nullptr;
175
182
  struct lm_ggml_tensor * wk_cross = nullptr;
176
183
  struct lm_ggml_tensor * wv_cross = nullptr;
@@ -388,8 +395,11 @@ struct llama_model {
388
395
 
389
396
  const struct lm_ggml_tensor * get_tensor(const char * name) const;
390
397
 
398
+ lm_ggml_tensor * get_rope_factors(uint32_t n_ctx_per_seq, int il) const;
399
+
400
+ // note: can mutate `cparams`
391
401
  // TODO: move this to new llm_arch_model_i interface
392
- llama_memory_i * create_memory() const; // TODO: params
402
+ llama_memory_i * create_memory(const llama_memory_params & params, llama_cparams & cparams) const;
393
403
 
394
404
  // TODO: move this to new llm_arch_model_i interface
395
405
  llm_graph_result_ptr build_graph(
@@ -232,7 +232,7 @@ static void llama_sampler_top_k_impl(llama_token_data_array * cur_p, int32_t k)
232
232
  // }
233
233
 
234
234
  if (k <= 0) {
235
- k = cur_p->size;
235
+ return;
236
236
  }
237
237
 
238
238
  k = std::min(k, (int) cur_p->size);
@@ -298,6 +298,7 @@ static void llama_sampler_top_k_impl(llama_token_data_array * cur_p, int32_t k)
298
298
  }
299
299
  cur_p->sorted = true;
300
300
  }
301
+
301
302
  cur_p->size = k;
302
303
  }
303
304
 
@@ -1506,7 +1506,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
1506
1506
  tokenizer_pre == "llama3" ||
1507
1507
  tokenizer_pre == "llama-v3" ||
1508
1508
  tokenizer_pre == "llama-bpe"||
1509
- tokenizer_pre == "falcon3") {
1509
+ tokenizer_pre == "falcon3" ||
1510
+ tokenizer_pre == "pixtral") {
1510
1511
  pre_type = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
1511
1512
  ignore_merges = true;
1512
1513
  add_bos = true;
@@ -1572,6 +1573,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
1572
1573
  pre_type = LLAMA_VOCAB_PRE_TYPE_PORO;
1573
1574
  clean_spaces = false;
1574
1575
  } else if (
1576
+ tokenizer_pre == "glm4" ||
1575
1577
  tokenizer_pre == "chatglm-bpe") {
1576
1578
  pre_type = LLAMA_VOCAB_PRE_TYPE_CHATGLM4;
1577
1579
  special_bos_id = LLAMA_TOKEN_NULL;
@@ -1840,6 +1842,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
1840
1842
  if (false
1841
1843
  || t.first == "<|fim_prefix|>" // Qwen
1842
1844
  || t.first == "<fim-prefix>"
1845
+ || t.first == "<fim_prefix>" // Granite
1843
1846
  || t.first == "<|fim▁begin|>" // DeepSeek
1844
1847
  || t.first == "<PRE>"
1845
1848
  || t.first == "▁<PRE>" // CodeLlama
@@ -1858,6 +1861,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
1858
1861
  if (false
1859
1862
  || t.first == "<|fim_suffix|>" // Qwen
1860
1863
  || t.first == "<fim-suffix>"
1864
+ || t.first == "<fim_suffix>" // Granite
1861
1865
  || t.first == "<|fim▁hole|>" // DeepSeek
1862
1866
  || t.first == "<SUF>"
1863
1867
  || t.first == "▁<SUF>" // CodeLlama
@@ -1876,6 +1880,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
1876
1880
  if (false
1877
1881
  || t.first == "<|fim_middle|>" // Qwen
1878
1882
  || t.first == "<fim-middle>"
1883
+ || t.first == "<fim_middle>" // Granite
1879
1884
  || t.first == "<|fim▁end|>" // DeepSeek
1880
1885
  || t.first == "<MID>"
1881
1886
  || t.first == "▁<MID>" // CodeLlama
@@ -1894,6 +1899,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
1894
1899
  if (false
1895
1900
  || t.first == "<|fim_pad|>" // Qwen
1896
1901
  || t.first == "<fim-pad>"
1902
+ || t.first == "<fim_pad>" // Granite
1897
1903
  || t.first == "<PAD>"
1898
1904
  ) {
1899
1905
  special_fim_pad_id = t.second;
@@ -1912,6 +1918,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
1912
1918
  || t.first == "<|repo_name|>"
1913
1919
  || t.first == "<fim-repo>"
1914
1920
  || t.first == "<REPO>"
1921
+ || t.first == "<reponame>" // Granite
1915
1922
  ) {
1916
1923
  special_fim_rep_id = t.second;
1917
1924
  if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
package/cpp/llama.h CHANGED
@@ -112,6 +112,7 @@ extern "C" {
112
112
  LLAMA_VOCAB_PRE_TYPE_TRILLION = 31,
113
113
  LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32,
114
114
  LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33,
115
+ LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34,
115
116
  };
116
117
 
117
118
  enum llama_rope_type {
@@ -368,17 +369,18 @@ extern "C" {
368
369
 
369
370
  // model quantization parameters
370
371
  typedef struct llama_model_quantize_params {
371
- int32_t nthread; // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
372
- enum llama_ftype ftype; // quantize to this llama_ftype
373
- enum lm_ggml_type output_tensor_type; // output tensor type
374
- enum lm_ggml_type token_embedding_type; // token embeddings tensor type
375
- bool allow_requantize; // allow quantizing non-f32/f16 tensors
376
- bool quantize_output_tensor; // quantize output.weight
377
- bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
378
- bool pure; // quantize all tensors to the default type
379
- bool keep_split; // quantize to the same number of shards
380
- void * imatrix; // pointer to importance matrix data
381
- void * kv_overrides; // pointer to vector containing overrides
372
+ int32_t nthread; // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
373
+ enum llama_ftype ftype; // quantize to this llama_ftype
374
+ enum lm_ggml_type output_tensor_type; // output tensor type
375
+ enum lm_ggml_type token_embedding_type; // token embeddings tensor type
376
+ bool allow_requantize; // allow quantizing non-f32/f16 tensors
377
+ bool quantize_output_tensor; // quantize output.weight
378
+ bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
379
+ bool pure; // quantize all tensors to the default type
380
+ bool keep_split; // quantize to the same number of shards
381
+ void * imatrix; // pointer to importance matrix data
382
+ void * kv_overrides; // pointer to vector containing overrides
383
+ void * tensor_types; // pointer to vector containing tensor types
382
384
  } llama_model_quantize_params;
383
385
 
384
386
  typedef struct llama_logit_bias {
@@ -1231,6 +1233,7 @@ extern "C" {
1231
1233
  "will be removed in the future (see https://github.com/ggml-org/llama.cpp/pull/9896#discussion_r1800920915)");
1232
1234
 
1233
1235
  /// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
1236
+ /// Setting k <= 0 makes this a noop
1234
1237
  LLAMA_API struct llama_sampler * llama_sampler_init_top_k (int32_t k);
1235
1238
 
1236
1239
  /// @details Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751