cui-llama.rn 1.5.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. package/LICENSE +20 -20
  2. package/README.md +345 -319
  3. package/android/build.gradle +116 -116
  4. package/android/gradle.properties +5 -5
  5. package/android/src/main/AndroidManifest.xml +4 -4
  6. package/android/src/main/CMakeLists.txt +129 -124
  7. package/android/src/main/java/com/rnllama/LlamaContext.java +648 -645
  8. package/android/src/main/java/com/rnllama/RNLlama.java +695 -695
  9. package/android/src/main/java/com/rnllama/RNLlamaPackage.java +48 -48
  10. package/android/src/main/jni-utils.h +100 -100
  11. package/android/src/main/jni.cpp +1279 -1263
  12. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  14. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  15. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  16. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  17. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  18. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  19. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  20. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +135 -135
  21. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +136 -136
  22. package/cpp/LICENSE +21 -0
  23. package/cpp/README.md +4 -4
  24. package/cpp/chat.cpp +1 -1
  25. package/cpp/common.cpp +17 -2
  26. package/cpp/common.h +7 -3
  27. package/cpp/ggml-alloc.c +4 -1
  28. package/cpp/ggml-cpp.h +1 -1
  29. package/cpp/ggml-cpu/amx/amx.cpp +221 -0
  30. package/cpp/ggml-cpu/amx/amx.h +8 -0
  31. package/cpp/ggml-cpu/amx/common.h +91 -0
  32. package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
  33. package/cpp/ggml-cpu/amx/mmq.h +10 -0
  34. package/cpp/{binary-ops.h → ggml-cpu/binary-ops.h} +1 -1
  35. package/cpp/ggml-cpu/common.h +72 -0
  36. package/cpp/{ggml-cpu-aarch64.cpp → ggml-cpu/ggml-cpu-aarch64.cpp} +809 -101
  37. package/cpp/{ggml-cpu.c → ggml-cpu/ggml-cpu.c} +109 -42
  38. package/cpp/{ggml-cpu.cpp → ggml-cpu/ggml-cpu.cpp} +3 -0
  39. package/cpp/{ops.cpp → ggml-cpu/ops.cpp} +246 -160
  40. package/cpp/{ops.h → ggml-cpu/ops.h} +2 -20
  41. package/cpp/{sgemm.cpp → ggml-cpu/sgemm.cpp} +501 -0
  42. package/cpp/{simd-mappings.h → ggml-cpu/simd-mappings.h} +7 -3
  43. package/cpp/{unary-ops.h → ggml-cpu/unary-ops.h} +1 -1
  44. package/cpp/ggml-cpu.h +5 -0
  45. package/cpp/ggml-impl.h +16 -9
  46. package/cpp/ggml-llama-sim.metallib +0 -0
  47. package/cpp/ggml-llama.metallib +0 -0
  48. package/cpp/ggml-metal-impl.h +597 -597
  49. package/cpp/ggml-metal.m +496 -47
  50. package/cpp/ggml.c +134 -244
  51. package/cpp/ggml.h +62 -95
  52. package/cpp/json-schema-to-grammar.cpp +3 -0
  53. package/cpp/llama-arch.cpp +46 -17
  54. package/cpp/llama-arch.h +9 -0
  55. package/cpp/llama-batch.cpp +5 -1
  56. package/cpp/llama-batch.h +2 -1
  57. package/cpp/llama-chat.cpp +31 -10
  58. package/cpp/llama-chat.h +3 -2
  59. package/cpp/llama-context.cpp +104 -489
  60. package/cpp/llama-context.h +14 -30
  61. package/cpp/llama-graph.cpp +69 -62
  62. package/cpp/llama-graph.h +21 -18
  63. package/cpp/llama-hparams.h +5 -0
  64. package/cpp/llama-kv-cache.cpp +1497 -391
  65. package/cpp/llama-kv-cache.h +272 -80
  66. package/cpp/llama-memory.h +11 -1
  67. package/cpp/llama-model.cpp +502 -176
  68. package/cpp/llama-model.h +13 -3
  69. package/cpp/llama-sampling.cpp +2 -1
  70. package/cpp/llama-vocab.cpp +8 -1
  71. package/cpp/llama.h +14 -11
  72. package/cpp/rn-llama.cpp +721 -873
  73. package/cpp/rn-llama.h +134 -138
  74. package/cpp/sampling.h +107 -107
  75. package/cpp/unicode-data.cpp +7034 -7034
  76. package/cpp/unicode-data.h +20 -20
  77. package/cpp/unicode.cpp +849 -849
  78. package/cpp/unicode.h +66 -66
  79. package/ios/CMakeLists.txt +119 -108
  80. package/ios/RNLlama.h +13 -7
  81. package/ios/RNLlama.mm +423 -405
  82. package/ios/RNLlamaContext.h +57 -57
  83. package/ios/RNLlamaContext.mm +833 -835
  84. package/ios/rnllama.xcframework/Info.plist +74 -74
  85. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +143 -0
  86. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +681 -0
  87. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
  88. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
  89. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  90. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
  91. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
  92. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
  93. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +143 -0
  94. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +601 -0
  95. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  96. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
  97. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
  98. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
  99. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
  100. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +2189 -0
  101. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/gguf.h +202 -0
  102. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  103. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json.hpp +24766 -0
  104. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
  105. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +437 -0
  106. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +89 -0
  107. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +57 -0
  108. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +249 -0
  109. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
  110. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
  111. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
  112. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +595 -0
  113. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +161 -0
  114. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
  115. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-io.h +35 -0
  116. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +405 -0
  117. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +31 -0
  118. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
  119. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
  120. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +419 -0
  121. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
  122. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +1437 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/log.h +132 -0
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +134 -0
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sampling.h +107 -0
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/speculative.h +28 -0
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
  131. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode.h +66 -0
  132. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
  133. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  134. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  135. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
  136. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +681 -0
  137. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
  138. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
  139. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  140. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
  141. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
  142. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
  143. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +143 -0
  144. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +601 -0
  145. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  146. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
  147. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
  148. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
  149. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
  150. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2189 -0
  151. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
  152. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  153. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
  154. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
  155. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +437 -0
  156. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +89 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +57 -0
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +249 -0
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
  160. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
  161. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
  162. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +595 -0
  163. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +161 -0
  164. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
  165. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
  166. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +405 -0
  167. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +31 -0
  168. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
  169. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
  170. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +419 -0
  171. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
  172. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
  173. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1437 -0
  174. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
  175. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  176. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  177. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +134 -0
  178. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
  179. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
  180. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
  181. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
  182. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  183. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
  184. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  185. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  186. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +143 -0
  187. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +681 -0
  188. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
  189. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
  190. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  191. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
  192. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
  193. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
  194. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +143 -0
  195. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +601 -0
  196. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  197. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
  198. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
  199. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
  200. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
  201. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +2189 -0
  202. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/gguf.h +202 -0
  203. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  204. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json.hpp +24766 -0
  205. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
  206. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +437 -0
  207. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +89 -0
  208. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +57 -0
  209. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +249 -0
  210. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
  211. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
  212. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
  213. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +595 -0
  214. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +161 -0
  215. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
  216. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-io.h +35 -0
  217. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +405 -0
  218. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +31 -0
  219. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
  220. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
  221. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +419 -0
  222. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
  223. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
  224. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +1437 -0
  225. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/log.h +132 -0
  226. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  227. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  228. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +134 -0
  229. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sampling.h +107 -0
  230. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/speculative.h +28 -0
  231. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
  232. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode.h +66 -0
  233. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
  234. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  235. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  236. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
  237. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +681 -0
  238. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
  239. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
  240. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  241. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
  242. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
  243. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
  244. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +143 -0
  245. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +601 -0
  246. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  247. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
  248. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
  249. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
  250. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
  251. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2189 -0
  252. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
  253. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  254. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
  255. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
  256. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +437 -0
  257. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +89 -0
  258. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +57 -0
  259. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +249 -0
  260. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
  261. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
  262. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
  263. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +595 -0
  264. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +161 -0
  265. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
  266. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
  267. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +405 -0
  268. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +31 -0
  269. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
  270. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
  271. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +419 -0
  272. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
  273. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
  274. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1437 -0
  275. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
  276. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  277. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  278. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +134 -0
  279. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
  280. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
  281. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
  282. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
  283. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  284. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
  285. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  286. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  287. package/jest/mock.js +203 -203
  288. package/lib/commonjs/NativeRNLlama.js +1 -2
  289. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  290. package/lib/commonjs/chat.js.map +1 -1
  291. package/lib/commonjs/grammar.js +12 -31
  292. package/lib/commonjs/grammar.js.map +1 -1
  293. package/lib/commonjs/index.js +47 -47
  294. package/lib/commonjs/index.js.map +1 -1
  295. package/lib/commonjs/package.json +1 -0
  296. package/lib/module/NativeRNLlama.js +2 -0
  297. package/lib/module/NativeRNLlama.js.map +1 -1
  298. package/lib/module/chat.js +2 -0
  299. package/lib/module/chat.js.map +1 -1
  300. package/lib/module/grammar.js +14 -31
  301. package/lib/module/grammar.js.map +1 -1
  302. package/lib/module/index.js +47 -45
  303. package/lib/module/index.js.map +1 -1
  304. package/lib/module/package.json +1 -0
  305. package/lib/typescript/NativeRNLlama.d.ts +10 -4
  306. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  307. package/lib/typescript/index.d.ts.map +1 -1
  308. package/llama-rn.podspec +48 -48
  309. package/package.json +233 -233
  310. package/src/NativeRNLlama.ts +431 -426
  311. package/src/chat.ts +44 -44
  312. package/src/grammar.ts +854 -854
  313. package/src/index.ts +495 -487
  314. /package/cpp/{binary-ops.cpp → ggml-cpu/binary-ops.cpp} +0 -0
  315. /package/cpp/{ggml-cpu-aarch64.h → ggml-cpu/ggml-cpu-aarch64.h} +0 -0
  316. /package/cpp/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -0
  317. /package/cpp/{ggml-cpu-quants.c → ggml-cpu/ggml-cpu-quants.c} +0 -0
  318. /package/cpp/{ggml-cpu-quants.h → ggml-cpu/ggml-cpu-quants.h} +0 -0
  319. /package/cpp/{ggml-cpu-traits.cpp → ggml-cpu/ggml-cpu-traits.cpp} +0 -0
  320. /package/cpp/{ggml-cpu-traits.h → ggml-cpu/ggml-cpu-traits.h} +0 -0
  321. /package/cpp/{sgemm.h → ggml-cpu/sgemm.h} +0 -0
  322. /package/cpp/{unary-ops.cpp → ggml-cpu/unary-ops.cpp} +0 -0
  323. /package/cpp/{vec.cpp → ggml-cpu/vec.cpp} +0 -0
  324. /package/cpp/{vec.h → ggml-cpu/vec.h} +0 -0
@@ -0,0 +1,76 @@
1
+ #pragma once
2
+
3
+ #include "llama.h"
4
+
5
+ #include "ggml-cpp.h"
6
+
7
+ #include <string>
8
+ #include <unordered_map>
9
+ #include <vector>
10
+
11
+ // TODO: pimpl
12
+
13
+ //
14
+ // llama_adapter_cvec
15
+ //
16
+
17
+ struct llama_adapter_cvec {
18
+ lm_ggml_tensor * tensor_for(int il) const;
19
+
20
+ lm_ggml_tensor * apply_to(lm_ggml_context * ctx, lm_ggml_tensor * cur, int il) const;
21
+
22
+ bool apply(
23
+ const llama_model & model,
24
+ const float * data,
25
+ size_t len,
26
+ int32_t n_embd,
27
+ int32_t il_start,
28
+ int32_t il_end);
29
+
30
+ private:
31
+ bool init(const llama_model & model);
32
+
33
+ int32_t layer_start = -1;
34
+ int32_t layer_end = -1;
35
+
36
+ std::vector<lm_ggml_context_ptr> ctxs;
37
+ std::vector<lm_ggml_backend_buffer_ptr> bufs;
38
+
39
+ std::vector<lm_ggml_tensor *> tensors; // per layer
40
+ };
41
+
42
+ //
43
+ // llama_adapter_lora
44
+ //
45
+
46
+ struct llama_adapter_lora_weight {
47
+ lm_ggml_tensor * a = nullptr;
48
+ lm_ggml_tensor * b = nullptr;
49
+
50
+ // get actual scale based on rank and alpha
51
+ float get_scale(float alpha, float adapter_scale) const {
52
+ const float rank = (float) b->ne[0];
53
+ const float scale = alpha ? adapter_scale * alpha / rank : adapter_scale;
54
+ return scale;
55
+ }
56
+
57
+ llama_adapter_lora_weight() = default;
58
+ llama_adapter_lora_weight(lm_ggml_tensor * a, lm_ggml_tensor * b) : a(a), b(b) {}
59
+ };
60
+
61
+ struct llama_adapter_lora {
62
+ // map tensor name to lora_a_b
63
+ std::unordered_map<std::string, llama_adapter_lora_weight> ab_map;
64
+
65
+ std::vector<lm_ggml_context_ptr> ctxs;
66
+ std::vector<lm_ggml_backend_buffer_ptr> bufs;
67
+
68
+ float alpha;
69
+
70
+ llama_adapter_lora() = default;
71
+ ~llama_adapter_lora() = default;
72
+
73
+ llama_adapter_lora_weight * get_weight(lm_ggml_tensor * w);
74
+ };
75
+
76
+ using llama_adapter_loras = std::unordered_map<llama_adapter_lora *, float>;
@@ -0,0 +1,437 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h" // lm_ggml_op
4
+
5
+ #include <string>
6
+
7
+ //
8
+ // gguf constants (sync with gguf.py)
9
+ //
10
+
11
+ enum llm_arch {
12
+ LLM_ARCH_LLAMA,
13
+ LLM_ARCH_LLAMA4,
14
+ LLM_ARCH_DECI,
15
+ LLM_ARCH_FALCON,
16
+ LLM_ARCH_BAICHUAN,
17
+ LLM_ARCH_GROK,
18
+ LLM_ARCH_GPT2,
19
+ LLM_ARCH_GPTJ,
20
+ LLM_ARCH_GPTNEOX,
21
+ LLM_ARCH_MPT,
22
+ LLM_ARCH_STARCODER,
23
+ LLM_ARCH_REFACT,
24
+ LLM_ARCH_BERT,
25
+ LLM_ARCH_NOMIC_BERT,
26
+ LLM_ARCH_NOMIC_BERT_MOE,
27
+ LLM_ARCH_JINA_BERT_V2,
28
+ LLM_ARCH_BLOOM,
29
+ LLM_ARCH_STABLELM,
30
+ LLM_ARCH_QWEN,
31
+ LLM_ARCH_QWEN2,
32
+ LLM_ARCH_QWEN2MOE,
33
+ LLM_ARCH_QWEN2VL,
34
+ LLM_ARCH_QWEN3,
35
+ LLM_ARCH_QWEN3MOE,
36
+ LLM_ARCH_PHI2,
37
+ LLM_ARCH_PHI3,
38
+ LLM_ARCH_PHIMOE,
39
+ LLM_ARCH_PLAMO,
40
+ LLM_ARCH_CODESHELL,
41
+ LLM_ARCH_ORION,
42
+ LLM_ARCH_INTERNLM2,
43
+ LLM_ARCH_MINICPM,
44
+ LLM_ARCH_MINICPM3,
45
+ LLM_ARCH_GEMMA,
46
+ LLM_ARCH_GEMMA2,
47
+ LLM_ARCH_GEMMA3,
48
+ LLM_ARCH_STARCODER2,
49
+ LLM_ARCH_MAMBA,
50
+ LLM_ARCH_XVERSE,
51
+ LLM_ARCH_COMMAND_R,
52
+ LLM_ARCH_COHERE2,
53
+ LLM_ARCH_DBRX,
54
+ LLM_ARCH_OLMO,
55
+ LLM_ARCH_OLMO2,
56
+ LLM_ARCH_OLMOE,
57
+ LLM_ARCH_OPENELM,
58
+ LLM_ARCH_ARCTIC,
59
+ LLM_ARCH_DEEPSEEK,
60
+ LLM_ARCH_DEEPSEEK2,
61
+ LLM_ARCH_CHATGLM,
62
+ LLM_ARCH_GLM4,
63
+ LLM_ARCH_BITNET,
64
+ LLM_ARCH_T5,
65
+ LLM_ARCH_T5ENCODER,
66
+ LLM_ARCH_JAIS,
67
+ LLM_ARCH_NEMOTRON,
68
+ LLM_ARCH_EXAONE,
69
+ LLM_ARCH_RWKV6,
70
+ LLM_ARCH_RWKV6QWEN2,
71
+ LLM_ARCH_RWKV7,
72
+ LLM_ARCH_ARWKV7,
73
+ LLM_ARCH_GRANITE,
74
+ LLM_ARCH_GRANITE_MOE,
75
+ LLM_ARCH_CHAMELEON,
76
+ LLM_ARCH_WAVTOKENIZER_DEC,
77
+ LLM_ARCH_PLM,
78
+ LLM_ARCH_BAILINGMOE,
79
+ LLM_ARCH_UNKNOWN,
80
+ };
81
+
82
+ enum llm_kv {
83
+ LLM_KV_GENERAL_TYPE,
84
+ LLM_KV_GENERAL_ARCHITECTURE,
85
+ LLM_KV_GENERAL_QUANTIZATION_VERSION,
86
+ LLM_KV_GENERAL_ALIGNMENT,
87
+ LLM_KV_GENERAL_FILE_TYPE,
88
+ LLM_KV_GENERAL_NAME,
89
+ LLM_KV_GENERAL_AUTHOR,
90
+ LLM_KV_GENERAL_VERSION,
91
+ LLM_KV_GENERAL_URL,
92
+ LLM_KV_GENERAL_DESCRIPTION,
93
+ LLM_KV_GENERAL_LICENSE,
94
+ LLM_KV_GENERAL_SOURCE_URL,
95
+ LLM_KV_GENERAL_SOURCE_HF_REPO,
96
+
97
+ LLM_KV_VOCAB_SIZE,
98
+ LLM_KV_CONTEXT_LENGTH,
99
+ LLM_KV_EMBEDDING_LENGTH,
100
+ LLM_KV_FEATURES_LENGTH,
101
+ LLM_KV_BLOCK_COUNT,
102
+ LLM_KV_LEADING_DENSE_BLOCK_COUNT,
103
+ LLM_KV_FEED_FORWARD_LENGTH,
104
+ LLM_KV_EXPERT_FEED_FORWARD_LENGTH,
105
+ LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH,
106
+ LLM_KV_USE_PARALLEL_RESIDUAL,
107
+ LLM_KV_TENSOR_DATA_LAYOUT,
108
+ LLM_KV_EXPERT_COUNT,
109
+ LLM_KV_EXPERT_USED_COUNT,
110
+ LLM_KV_EXPERT_SHARED_COUNT,
111
+ LLM_KV_EXPERT_WEIGHTS_SCALE,
112
+ LLM_KV_EXPERT_WEIGHTS_NORM,
113
+ LLM_KV_EXPERT_GATING_FUNC,
114
+ LLM_KV_MOE_EVERY_N_LAYERS,
115
+ LLM_KV_POOLING_TYPE,
116
+ LLM_KV_LOGIT_SCALE,
117
+ LLM_KV_DECODER_START_TOKEN_ID,
118
+ LLM_KV_ATTN_LOGIT_SOFTCAPPING,
119
+ LLM_KV_FINAL_LOGIT_SOFTCAPPING,
120
+ LLM_KV_SWIN_NORM,
121
+ LLM_KV_RESCALE_EVERY_N_LAYERS,
122
+ LLM_KV_TIME_MIX_EXTRA_DIM,
123
+ LLM_KV_TIME_DECAY_EXTRA_DIM,
124
+ LLM_KV_RESIDUAL_SCALE,
125
+ LLM_KV_EMBEDDING_SCALE,
126
+ LLM_KV_TOKEN_SHIFT_COUNT,
127
+ LLM_KV_INTERLEAVE_MOE_LAYER_STEP,
128
+
129
+ LLM_KV_ATTENTION_HEAD_COUNT,
130
+ LLM_KV_ATTENTION_HEAD_COUNT_KV,
131
+ LLM_KV_ATTENTION_MAX_ALIBI_BIAS,
132
+ LLM_KV_ATTENTION_CLAMP_KQV,
133
+ LLM_KV_ATTENTION_KEY_LENGTH,
134
+ LLM_KV_ATTENTION_VALUE_LENGTH,
135
+ LLM_KV_ATTENTION_LAYERNORM_EPS,
136
+ LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
137
+ LLM_KV_ATTENTION_GROUPNORM_EPS,
138
+ LLM_KV_ATTENTION_GROUPNORM_GROUPS,
139
+ LLM_KV_ATTENTION_CAUSAL,
140
+ LLM_KV_ATTENTION_Q_LORA_RANK,
141
+ LLM_KV_ATTENTION_KV_LORA_RANK,
142
+ LLM_KV_ATTENTION_DECAY_LORA_RANK,
143
+ LLM_KV_ATTENTION_ICLR_LORA_RANK,
144
+ LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK,
145
+ LLM_KV_ATTENTION_GATE_LORA_RANK,
146
+ LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
147
+ LLM_KV_ATTENTION_SLIDING_WINDOW,
148
+ LLM_KV_ATTENTION_SCALE,
149
+ LLM_KV_ATTENTION_KEY_LENGTH_MLA,
150
+ LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
151
+
152
+ LLM_KV_ROPE_DIMENSION_COUNT,
153
+ LLM_KV_ROPE_DIMENSION_SECTIONS,
154
+ LLM_KV_ROPE_FREQ_BASE,
155
+ LLM_KV_ROPE_SCALE_LINEAR,
156
+ LLM_KV_ROPE_SCALING_TYPE,
157
+ LLM_KV_ROPE_SCALING_FACTOR,
158
+ LLM_KV_ROPE_SCALING_ATTN_FACTOR,
159
+ LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,
160
+ LLM_KV_ROPE_SCALING_FINETUNED,
161
+ LLM_KV_ROPE_SCALING_YARN_LOG_MUL,
162
+
163
+ LLM_KV_SPLIT_NO,
164
+ LLM_KV_SPLIT_COUNT,
165
+ LLM_KV_SPLIT_TENSORS_COUNT,
166
+
167
+ LLM_KV_SSM_INNER_SIZE,
168
+ LLM_KV_SSM_CONV_KERNEL,
169
+ LLM_KV_SSM_STATE_SIZE,
170
+ LLM_KV_SSM_TIME_STEP_RANK,
171
+ LLM_KV_SSM_DT_B_C_RMS,
172
+
173
+ LLM_KV_WKV_HEAD_SIZE,
174
+
175
+ LLM_KV_TOKENIZER_MODEL,
176
+ LLM_KV_TOKENIZER_PRE,
177
+ LLM_KV_TOKENIZER_LIST,
178
+ LLM_KV_TOKENIZER_TOKEN_TYPE,
179
+ LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,
180
+ LLM_KV_TOKENIZER_SCORES,
181
+ LLM_KV_TOKENIZER_MERGES,
182
+ LLM_KV_TOKENIZER_BOS_ID,
183
+ LLM_KV_TOKENIZER_EOS_ID,
184
+ LLM_KV_TOKENIZER_EOT_ID,
185
+ LLM_KV_TOKENIZER_EOM_ID,
186
+ LLM_KV_TOKENIZER_UNK_ID,
187
+ LLM_KV_TOKENIZER_SEP_ID,
188
+ LLM_KV_TOKENIZER_PAD_ID,
189
+ LLM_KV_TOKENIZER_CLS_ID,
190
+ LLM_KV_TOKENIZER_MASK_ID,
191
+ LLM_KV_TOKENIZER_ADD_BOS,
192
+ LLM_KV_TOKENIZER_ADD_EOS,
193
+ LLM_KV_TOKENIZER_ADD_PREFIX,
194
+ LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
195
+ LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
196
+ LLM_KV_TOKENIZER_HF_JSON,
197
+ LLM_KV_TOKENIZER_RWKV,
198
+ LLM_KV_TOKENIZER_CHAT_TEMPLATE,
199
+ LLM_KV_TOKENIZER_CHAT_TEMPLATE_N,
200
+ LLM_KV_TOKENIZER_FIM_PRE_ID,
201
+ LLM_KV_TOKENIZER_FIM_SUF_ID,
202
+ LLM_KV_TOKENIZER_FIM_MID_ID,
203
+ LLM_KV_TOKENIZER_FIM_PAD_ID,
204
+ LLM_KV_TOKENIZER_FIM_REP_ID,
205
+ LLM_KV_TOKENIZER_FIM_SEP_ID,
206
+
207
+ LLM_KV_ADAPTER_TYPE,
208
+ LLM_KV_ADAPTER_LORA_ALPHA,
209
+
210
+ LLM_KV_POSNET_EMBEDDING_LENGTH,
211
+ LLM_KV_POSNET_BLOCK_COUNT,
212
+
213
+ LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
214
+ LLM_KV_CONVNEXT_BLOCK_COUNT,
215
+
216
+ // deprecated:
217
+ LLM_KV_TOKENIZER_PREFIX_ID,
218
+ LLM_KV_TOKENIZER_SUFFIX_ID,
219
+ LLM_KV_TOKENIZER_MIDDLE_ID,
220
+ };
221
+
222
+ enum llm_tensor {
223
+ LLM_TENSOR_TOKEN_EMBD,
224
+ LLM_TENSOR_TOKEN_EMBD_NORM,
225
+ LLM_TENSOR_TOKEN_TYPES,
226
+ LLM_TENSOR_POS_EMBD,
227
+ LLM_TENSOR_OUTPUT,
228
+ LLM_TENSOR_OUTPUT_NORM,
229
+ LLM_TENSOR_ROPE_FREQS,
230
+ LLM_TENSOR_ROPE_FACTORS_LONG,
231
+ LLM_TENSOR_ROPE_FACTORS_SHORT,
232
+ LLM_TENSOR_ATTN_Q,
233
+ LLM_TENSOR_ATTN_K,
234
+ LLM_TENSOR_ATTN_V,
235
+ LLM_TENSOR_ATTN_QKV,
236
+ LLM_TENSOR_ATTN_OUT,
237
+ LLM_TENSOR_ATTN_NORM,
238
+ LLM_TENSOR_ATTN_NORM_2,
239
+ LLM_TENSOR_ATTN_OUT_NORM,
240
+ LLM_TENSOR_ATTN_POST_NORM,
241
+ LLM_TENSOR_ATTN_ROT_EMBD,
242
+ LLM_TENSOR_FFN_GATE_INP,
243
+ LLM_TENSOR_FFN_GATE_INP_SHEXP,
244
+ LLM_TENSOR_FFN_NORM,
245
+ LLM_TENSOR_FFN_POST_NORM,
246
+ LLM_TENSOR_FFN_GATE,
247
+ LLM_TENSOR_FFN_DOWN,
248
+ LLM_TENSOR_FFN_UP,
249
+ LLM_TENSOR_FFN_ACT,
250
+ LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility
251
+ LLM_TENSOR_FFN_GATE_EXP,
252
+ LLM_TENSOR_FFN_UP_EXP,
253
+ LLM_TENSOR_FFN_NORM_EXPS,
254
+ LLM_TENSOR_FFN_DOWN_EXPS, // merged experts
255
+ LLM_TENSOR_FFN_GATE_EXPS,
256
+ LLM_TENSOR_FFN_UP_EXPS,
257
+ LLM_TENSOR_FFN_DOWN_SHEXP,
258
+ LLM_TENSOR_FFN_GATE_SHEXP,
259
+ LLM_TENSOR_FFN_UP_SHEXP,
260
+ LLM_TENSOR_FFN_EXP_PROBS_B,
261
+ LLM_TENSOR_ATTN_Q_NORM,
262
+ LLM_TENSOR_ATTN_K_NORM,
263
+ LLM_TENSOR_LAYER_OUT_NORM,
264
+ LLM_TENSOR_POST_ATTN_NORM,
265
+ LLM_TENSOR_POST_MLP_NORM,
266
+ LLM_TENSOR_SSM_IN,
267
+ LLM_TENSOR_SSM_CONV1D,
268
+ LLM_TENSOR_SSM_X,
269
+ LLM_TENSOR_SSM_DT,
270
+ LLM_TENSOR_SSM_A,
271
+ LLM_TENSOR_SSM_D,
272
+ LLM_TENSOR_SSM_OUT,
273
+ LLM_TENSOR_TIME_MIX_W0,
274
+ LLM_TENSOR_TIME_MIX_W1,
275
+ LLM_TENSOR_TIME_MIX_W2,
276
+ LLM_TENSOR_TIME_MIX_A0,
277
+ LLM_TENSOR_TIME_MIX_A1,
278
+ LLM_TENSOR_TIME_MIX_A2,
279
+ LLM_TENSOR_TIME_MIX_V0,
280
+ LLM_TENSOR_TIME_MIX_V1,
281
+ LLM_TENSOR_TIME_MIX_V2,
282
+ LLM_TENSOR_TIME_MIX_G1,
283
+ LLM_TENSOR_TIME_MIX_G2,
284
+ LLM_TENSOR_TIME_MIX_K_K,
285
+ LLM_TENSOR_TIME_MIX_K_A,
286
+ LLM_TENSOR_TIME_MIX_R_K,
287
+ LLM_TENSOR_TIME_MIX_LERP_X,
288
+ LLM_TENSOR_TIME_MIX_LERP_W,
289
+ LLM_TENSOR_TIME_MIX_LERP_K,
290
+ LLM_TENSOR_TIME_MIX_LERP_V,
291
+ LLM_TENSOR_TIME_MIX_LERP_R,
292
+ LLM_TENSOR_TIME_MIX_LERP_G,
293
+ LLM_TENSOR_TIME_MIX_LERP_FUSED,
294
+ LLM_TENSOR_TIME_MIX_FIRST,
295
+ LLM_TENSOR_TIME_MIX_DECAY,
296
+ LLM_TENSOR_TIME_MIX_DECAY_W1,
297
+ LLM_TENSOR_TIME_MIX_DECAY_W2,
298
+ LLM_TENSOR_TIME_MIX_KEY,
299
+ LLM_TENSOR_TIME_MIX_VALUE,
300
+ LLM_TENSOR_TIME_MIX_RECEPTANCE,
301
+ LLM_TENSOR_TIME_MIX_GATE,
302
+ LLM_TENSOR_TIME_MIX_LN,
303
+ LLM_TENSOR_TIME_MIX_OUTPUT,
304
+ LLM_TENSOR_CHANNEL_MIX_LERP_K,
305
+ LLM_TENSOR_CHANNEL_MIX_LERP_R,
306
+ LLM_TENSOR_CHANNEL_MIX_KEY,
307
+ LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,
308
+ LLM_TENSOR_CHANNEL_MIX_VALUE,
309
+ LLM_TENSOR_ATTN_Q_A,
310
+ LLM_TENSOR_ATTN_Q_B,
311
+ LLM_TENSOR_ATTN_KV_A_MQA,
312
+ LLM_TENSOR_ATTN_KV_B,
313
+ LLM_TENSOR_ATTN_K_B,
314
+ LLM_TENSOR_ATTN_V_B,
315
+ LLM_TENSOR_ATTN_Q_A_NORM,
316
+ LLM_TENSOR_ATTN_KV_A_NORM,
317
+ LLM_TENSOR_ATTN_SUB_NORM,
318
+ LLM_TENSOR_FFN_SUB_NORM,
319
+ LLM_TENSOR_DEC_ATTN_NORM,
320
+ LLM_TENSOR_DEC_ATTN_Q,
321
+ LLM_TENSOR_DEC_ATTN_K,
322
+ LLM_TENSOR_DEC_ATTN_V,
323
+ LLM_TENSOR_DEC_ATTN_OUT,
324
+ LLM_TENSOR_DEC_ATTN_REL_B,
325
+ LLM_TENSOR_DEC_CROSS_ATTN_NORM,
326
+ LLM_TENSOR_DEC_CROSS_ATTN_Q,
327
+ LLM_TENSOR_DEC_CROSS_ATTN_K,
328
+ LLM_TENSOR_DEC_CROSS_ATTN_V,
329
+ LLM_TENSOR_DEC_CROSS_ATTN_OUT,
330
+ LLM_TENSOR_DEC_CROSS_ATTN_REL_B,
331
+ LLM_TENSOR_DEC_FFN_NORM,
332
+ LLM_TENSOR_DEC_FFN_GATE,
333
+ LLM_TENSOR_DEC_FFN_DOWN,
334
+ LLM_TENSOR_DEC_FFN_UP,
335
+ LLM_TENSOR_DEC_OUTPUT_NORM,
336
+ LLM_TENSOR_ENC_ATTN_NORM,
337
+ LLM_TENSOR_ENC_ATTN_Q,
338
+ LLM_TENSOR_ENC_ATTN_K,
339
+ LLM_TENSOR_ENC_ATTN_V,
340
+ LLM_TENSOR_ENC_ATTN_OUT,
341
+ LLM_TENSOR_ENC_ATTN_REL_B,
342
+ LLM_TENSOR_ENC_FFN_NORM,
343
+ LLM_TENSOR_ENC_FFN_GATE,
344
+ LLM_TENSOR_ENC_FFN_DOWN,
345
+ LLM_TENSOR_ENC_FFN_UP,
346
+ LLM_TENSOR_ENC_OUTPUT_NORM,
347
+ LLM_TENSOR_CLS,
348
+ LLM_TENSOR_CLS_OUT,
349
+ LLM_TENSOR_CONV1D,
350
+ LLM_TENSOR_CONVNEXT_DW,
351
+ LLM_TENSOR_CONVNEXT_NORM,
352
+ LLM_TENSOR_CONVNEXT_PW1,
353
+ LLM_TENSOR_CONVNEXT_PW2,
354
+ LLM_TENSOR_CONVNEXT_GAMMA,
355
+ LLM_TENSOR_POS_NET_CONV1,
356
+ LLM_TENSOR_POS_NET_CONV2,
357
+ LLM_TENSOR_POS_NET_NORM,
358
+ LLM_TENSOR_POS_NET_NORM1,
359
+ LLM_TENSOR_POS_NET_NORM2,
360
+ LLM_TENSOR_POS_NET_ATTN_NORM,
361
+ LLM_TENSOR_POS_NET_ATTN_Q,
362
+ LLM_TENSOR_POS_NET_ATTN_K,
363
+ LLM_TENSOR_POS_NET_ATTN_V,
364
+ LLM_TENSOR_POS_NET_ATTN_OUT,
365
+ };
366
+
367
+ enum llm_tensor_layer {
368
+ LLM_TENSOR_LAYER_INPUT,
369
+ LLM_TENSOR_LAYER_REPEATING,
370
+ LLM_TENSOR_LAYER_OUTPUT,
371
+ };
372
+
373
+ struct LLM_KV {
374
+ LLM_KV(llm_arch arch, const char * suffix = nullptr);
375
+
376
+ llm_arch arch;
377
+ const char * suffix;
378
+
379
+ std::string operator()(llm_kv kv) const;
380
+ };
381
+
382
+ // helper to handle gguf constants
383
+ // usage:
384
+ //
385
+ // const auto tn = LLM_TN(LLM_ARCH_LLAMA);
386
+ //
387
+ // std::string name = tn(LLM_TENSOR_OUTPUT); -> "output"
388
+ // std::string name = tn(LLM_TENSOR_TOKEN_EMBD, "bias"); -> "token_embd.bias"
389
+ // std::string name = tn(LLM_TENSOR_ATTN_NORM, "weight", 3); -> "blk.3.attn_norm.weight"
390
+ //
391
+ struct LLM_TN_IMPL {
392
+ const llm_arch arch;
393
+ const llm_tensor tensor;
394
+ const char * const suffix;
395
+ const int bid;
396
+ const int xid;
397
+
398
+ std::string str() const;
399
+
400
+ operator std::string() const {
401
+ return str();
402
+ }
403
+
404
+ friend bool operator==(const std::string & str, const LLM_TN_IMPL & tn) {
405
+ return str == tn.str();
406
+ }
407
+
408
+ friend bool operator!=(const std::string & str, const LLM_TN_IMPL & tn) {
409
+ return str != tn.str();
410
+ }
411
+ };
412
+
413
+ struct LLM_TN {
414
+ LLM_TN(llm_arch arch) : arch(arch) {}
415
+
416
+ llm_arch arch;
417
+
418
+ LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
419
+ return { arch, tensor, suffix, bid, xid };
420
+ }
421
+
422
+ LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
423
+ return { arch, tensor, nullptr, bid, xid };
424
+ }
425
+ };
426
+
427
+
428
+ struct llm_tensor_info {
429
+ llm_tensor_layer layer;
430
+ lm_ggml_op op;
431
+ };
432
+
433
+ const char * llm_arch_name(llm_arch arch);
434
+
435
+ llm_arch llm_arch_from_string(const std::string & name);
436
+
437
+ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
@@ -0,0 +1,89 @@
1
+ #pragma once
2
+
3
+ #include "llama.h"
4
+
5
+ #include <array>
6
+ #include <vector>
7
+
8
+ // very similar to llama_batch,
9
+ // but has more metadata about sequences
10
+ struct llama_ubatch {
11
+ bool equal_seqs;
12
+ // TODO: whole_seqs for embeddings?
13
+
14
+ uint32_t n_tokens; // total tokens (n_seq_tokens * n_seqs)
15
+ uint32_t n_seq_tokens; // tokens per sequence
16
+ uint32_t n_seqs;
17
+
18
+ llama_token * token; // [n_tokens]
19
+ float * embd; // [n_embd, n_tokens]
20
+ llama_pos * pos; // [n_tokens]
21
+ int32_t * n_seq_id; // [n_seqs]
22
+ llama_seq_id ** seq_id; // [n_seqs]
23
+ int8_t * output; // [n_tokens]
24
+ };
25
+
26
+ struct llama_sbatch_seq {
27
+ int32_t n_seq_id;
28
+
29
+ llama_seq_id * seq_id;
30
+
31
+ size_t offset;
32
+ size_t length;
33
+ };
34
+
35
+ // sequence-length-aware batch splitting
36
+ struct llama_sbatch {
37
+ // tokens left in this batch
38
+ size_t n_tokens;
39
+
40
+ size_t n_embd;
41
+
42
+ bool logits_all; // TODO: remove once lctx.logits_all is removed too
43
+
44
+ // sorted indices into the batch
45
+ std::vector<int64_t> ids;
46
+ // batch indices of the output
47
+ std::vector<int64_t> out_ids;
48
+ std::vector<llama_sbatch_seq> seq;
49
+
50
+ const llama_batch * batch = nullptr;
51
+
52
+ // buffers for the ubatch
53
+ std::vector<llama_token> ubatch_token;
54
+ std::vector<float> ubatch_embd;
55
+ std::vector<llama_pos> ubatch_pos;
56
+ std::vector<int32_t> ubatch_n_seq_id;
57
+ std::vector<llama_seq_id *> ubatch_seq_id;
58
+ std::vector<int8_t> ubatch_output;
59
+
60
+ llama_ubatch reserve_ubatch(size_t n_ubatch, bool has_embd = false);
61
+
62
+ void add_seq_to_ubatch(llama_ubatch & ubatch, llama_sbatch_seq & seq, size_t length);
63
+
64
+ // simple split, unknown number of sequences of unequal lengths
65
+ llama_ubatch split_simple(size_t n_ubatch);
66
+
67
+ // make batches of equal-length sequences
68
+ llama_ubatch split_equal(size_t n_ubatch);
69
+
70
+ // sequence-wise split
71
+ llama_ubatch split_seq(size_t n_ubatch);
72
+
73
+ llama_sbatch() = default;
74
+ llama_sbatch(const llama_batch & batch, size_t n_embd, bool simple_split = false, bool logits_all = false);
75
+ };
76
+
77
+ // temporary allocate memory for the input batch if needed
78
+ struct llama_batch_allocr {
79
+ struct llama_batch batch;
80
+
81
+ std::array<llama_seq_id, 1> seq_id_0 = { 0 }; // default sequence id
82
+ std::vector<llama_pos> pos;
83
+ std::vector<int32_t> n_seq_id;
84
+ std::vector<llama_seq_id *> seq_id;
85
+ std::vector<int8_t> logits;
86
+
87
+ // optionally fulfill the batch returned by llama_batch_get_one
88
+ llama_batch_allocr(struct llama_batch in_batch, llama_pos p0);
89
+ };
@@ -0,0 +1,57 @@
1
+ #pragma once
2
+
3
+ #include <string>
4
+ #include <vector>
5
+ #include <cstdint>
6
+
7
+ enum llm_chat_template {
8
+ LLM_CHAT_TEMPLATE_CHATML,
9
+ LLM_CHAT_TEMPLATE_LLAMA_2,
10
+ LLM_CHAT_TEMPLATE_LLAMA_2_SYS,
11
+ LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS,
12
+ LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP,
13
+ LLM_CHAT_TEMPLATE_MISTRAL_V1,
14
+ LLM_CHAT_TEMPLATE_MISTRAL_V3,
15
+ LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN,
16
+ LLM_CHAT_TEMPLATE_MISTRAL_V7,
17
+ LLM_CHAT_TEMPLATE_PHI_3,
18
+ LLM_CHAT_TEMPLATE_PHI_4,
19
+ LLM_CHAT_TEMPLATE_FALCON_3,
20
+ LLM_CHAT_TEMPLATE_ZEPHYR,
21
+ LLM_CHAT_TEMPLATE_MONARCH,
22
+ LLM_CHAT_TEMPLATE_GEMMA,
23
+ LLM_CHAT_TEMPLATE_ORION,
24
+ LLM_CHAT_TEMPLATE_OPENCHAT,
25
+ LLM_CHAT_TEMPLATE_VICUNA,
26
+ LLM_CHAT_TEMPLATE_VICUNA_ORCA,
27
+ LLM_CHAT_TEMPLATE_DEEPSEEK,
28
+ LLM_CHAT_TEMPLATE_DEEPSEEK_2,
29
+ LLM_CHAT_TEMPLATE_DEEPSEEK_3,
30
+ LLM_CHAT_TEMPLATE_COMMAND_R,
31
+ LLM_CHAT_TEMPLATE_LLAMA_3,
32
+ LLM_CHAT_TEMPLATE_CHATGLM_3,
33
+ LLM_CHAT_TEMPLATE_CHATGLM_4,
34
+ LLM_CHAT_TEMPLATE_GLMEDGE,
35
+ LLM_CHAT_TEMPLATE_MINICPM,
36
+ LLM_CHAT_TEMPLATE_EXAONE_3,
37
+ LLM_CHAT_TEMPLATE_RWKV_WORLD,
38
+ LLM_CHAT_TEMPLATE_GRANITE,
39
+ LLM_CHAT_TEMPLATE_GIGACHAT,
40
+ LLM_CHAT_TEMPLATE_MEGREZ,
41
+ LLM_CHAT_TEMPLATE_YANDEX,
42
+ LLM_CHAT_TEMPLATE_BAILING,
43
+ LLM_CHAT_TEMPLATE_LLAMA4,
44
+ LLM_CHAT_TEMPLATE_SMOLVLM,
45
+ LLM_CHAT_TEMPLATE_UNKNOWN,
46
+ };
47
+
48
+ struct llama_chat_message;
49
+
50
+ llm_chat_template llm_chat_template_from_str(const std::string & name);
51
+
52
+ llm_chat_template llm_chat_detect_template(const std::string & tmpl);
53
+
54
+ int32_t llm_chat_apply_template(
55
+ llm_chat_template tmpl,
56
+ const std::vector<const llama_chat_message *> & chat,
57
+ std::string & dest, bool add_ass);