cui-llama.rn 1.4.6 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (366) hide show
  1. package/LICENSE +20 -20
  2. package/README.md +317 -319
  3. package/android/build.gradle +116 -116
  4. package/android/gradle.properties +5 -5
  5. package/android/src/main/AndroidManifest.xml +4 -4
  6. package/android/src/main/CMakeLists.txt +124 -117
  7. package/android/src/main/java/com/rnllama/LlamaContext.java +645 -645
  8. package/android/src/main/java/com/rnllama/RNLlama.java +695 -695
  9. package/android/src/main/java/com/rnllama/RNLlamaPackage.java +48 -48
  10. package/android/src/main/jni-utils.h +100 -100
  11. package/android/src/main/jni.cpp +1263 -1245
  12. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  14. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  15. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  16. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  17. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  18. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  19. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  20. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +135 -135
  21. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +136 -136
  22. package/cpp/README.md +4 -4
  23. package/cpp/binary-ops.cpp +158 -0
  24. package/cpp/binary-ops.h +16 -0
  25. package/cpp/chat.cpp +1769 -1779
  26. package/cpp/chat.h +9 -1
  27. package/cpp/common.cpp +20 -522
  28. package/cpp/common.h +13 -36
  29. package/cpp/cpu-common.h +72 -0
  30. package/cpp/ggml-common.h +12 -6
  31. package/cpp/ggml-cpu-aarch64.cpp +1557 -80
  32. package/cpp/ggml-cpu-impl.h +2 -21
  33. package/cpp/ggml-cpu-quants.c +904 -405
  34. package/cpp/ggml-cpu.c +909 -13237
  35. package/cpp/ggml-impl.h +50 -23
  36. package/cpp/ggml-llama-sim.metallib +0 -0
  37. package/cpp/ggml-llama.metallib +0 -0
  38. package/cpp/ggml-metal-impl.h +597 -523
  39. package/cpp/ggml-metal.m +798 -580
  40. package/cpp/ggml.c +92 -3
  41. package/cpp/ggml.h +30 -6
  42. package/cpp/gguf.cpp +1 -0
  43. package/cpp/llama-adapter.cpp +55 -20
  44. package/cpp/llama-adapter.h +11 -9
  45. package/cpp/llama-arch.cpp +217 -16
  46. package/cpp/llama-arch.h +25 -0
  47. package/cpp/llama-batch.h +2 -2
  48. package/cpp/llama-chat.cpp +54 -2
  49. package/cpp/llama-chat.h +3 -0
  50. package/cpp/llama-context.cpp +2294 -1238
  51. package/cpp/llama-context.h +214 -77
  52. package/cpp/llama-cparams.h +1 -0
  53. package/cpp/llama-graph.cpp +1695 -0
  54. package/cpp/llama-graph.h +592 -0
  55. package/cpp/llama-hparams.cpp +8 -0
  56. package/cpp/llama-hparams.h +17 -0
  57. package/cpp/llama-io.cpp +15 -0
  58. package/cpp/llama-io.h +35 -0
  59. package/cpp/llama-kv-cache.cpp +965 -303
  60. package/cpp/llama-kv-cache.h +145 -151
  61. package/cpp/llama-memory.cpp +1 -0
  62. package/cpp/llama-memory.h +21 -0
  63. package/cpp/llama-mmap.cpp +1 -1
  64. package/cpp/llama-model-loader.cpp +10 -5
  65. package/cpp/llama-model-loader.h +5 -3
  66. package/cpp/llama-model.cpp +9194 -201
  67. package/cpp/llama-model.h +40 -1
  68. package/cpp/llama-sampling.cpp +5 -0
  69. package/cpp/llama-vocab.cpp +36 -5
  70. package/cpp/llama.cpp +51 -9984
  71. package/cpp/llama.h +102 -22
  72. package/cpp/log.cpp +34 -0
  73. package/cpp/minja/chat-template.hpp +15 -7
  74. package/cpp/minja/minja.hpp +120 -94
  75. package/cpp/ops.cpp +8723 -0
  76. package/cpp/ops.h +128 -0
  77. package/cpp/rn-llama.cpp +873 -882
  78. package/cpp/rn-llama.h +138 -148
  79. package/cpp/sampling.cpp +3 -0
  80. package/cpp/sampling.h +107 -107
  81. package/cpp/sgemm.cpp +533 -88
  82. package/cpp/simd-mappings.h +888 -0
  83. package/cpp/speculative.cpp +4 -4
  84. package/cpp/unary-ops.cpp +186 -0
  85. package/cpp/unary-ops.h +28 -0
  86. package/cpp/unicode-data.cpp +7034 -7034
  87. package/cpp/unicode-data.h +20 -20
  88. package/cpp/unicode.cpp +849 -849
  89. package/cpp/unicode.h +66 -66
  90. package/cpp/vec.cpp +258 -0
  91. package/cpp/vec.h +802 -0
  92. package/ios/CMakeLists.txt +116 -105
  93. package/ios/RNLlama.h +7 -7
  94. package/ios/RNLlama.mm +418 -405
  95. package/ios/RNLlamaContext.h +57 -57
  96. package/ios/RNLlamaContext.mm +835 -819
  97. package/ios/rnllama.xcframework/Info.plist +74 -74
  98. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
  99. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +143 -0
  100. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +677 -0
  101. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
  102. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
  103. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  104. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
  105. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
  106. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
  107. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  108. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  109. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  110. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  111. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
  112. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
  113. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  114. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
  115. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
  116. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
  117. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
  118. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +2222 -0
  119. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/gguf.h +202 -0
  120. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  121. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json.hpp +24766 -0
  122. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +265 -0
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
  131. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
  132. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
  133. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-io.h +35 -0
  134. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  135. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
  136. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
  137. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
  138. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +409 -0
  139. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
  140. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
  141. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +1434 -0
  142. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/log.h +132 -0
  143. package/{cpp → ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja}/chat-template.hpp +15 -7
  144. package/{cpp → ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja}/minja.hpp +120 -94
  145. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +128 -0
  146. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
  147. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sampling.h +107 -0
  148. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +14 -0
  149. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
  150. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/speculative.h +28 -0
  151. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
  152. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
  153. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode.h +66 -0
  154. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +802 -0
  155. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
  156. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  157. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
  160. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
  161. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
  162. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
  163. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  164. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
  165. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
  166. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
  167. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  168. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  169. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  170. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  171. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
  172. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
  173. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  174. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
  175. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
  176. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
  177. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
  178. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
  179. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
  180. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  181. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
  182. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
  183. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
  184. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
  185. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
  186. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
  187. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
  188. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
  189. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
  190. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
  191. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
  192. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
  193. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
  194. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  195. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
  196. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
  197. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
  198. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
  199. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
  200. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
  201. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
  202. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
  203. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  204. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  205. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
  206. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
  207. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
  208. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
  209. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
  210. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
  211. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
  212. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
  213. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
  214. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
  215. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  216. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
  217. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  218. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  219. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
  220. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +143 -0
  221. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +677 -0
  222. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
  223. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
  224. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  225. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
  226. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
  227. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
  228. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  229. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  230. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  231. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  232. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
  233. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
  234. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  235. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
  236. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
  237. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
  238. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
  239. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +2222 -0
  240. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/gguf.h +202 -0
  241. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  242. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json.hpp +24766 -0
  243. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
  244. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
  245. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
  246. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
  247. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +265 -0
  248. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
  249. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
  250. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
  251. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
  252. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
  253. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
  254. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-io.h +35 -0
  255. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  256. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
  257. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
  258. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
  259. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +409 -0
  260. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
  261. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
  262. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +1434 -0
  263. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/log.h +132 -0
  264. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  265. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  266. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +128 -0
  267. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
  268. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sampling.h +107 -0
  269. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +14 -0
  270. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
  271. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/speculative.h +28 -0
  272. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
  273. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
  274. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode.h +66 -0
  275. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/vec.h +802 -0
  276. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
  277. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  278. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  279. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
  280. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
  281. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
  282. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
  283. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
  284. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  285. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
  286. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
  287. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
  288. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  289. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  290. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  291. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  292. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
  293. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
  294. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  295. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
  296. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
  297. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
  298. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
  299. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
  300. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
  301. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  302. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
  303. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
  304. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
  305. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
  306. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
  307. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
  308. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
  309. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
  310. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
  311. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
  312. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
  313. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
  314. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
  315. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  316. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
  317. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
  318. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
  319. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
  320. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
  321. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
  322. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
  323. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
  324. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  325. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  326. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
  327. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
  328. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
  329. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
  330. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
  331. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
  332. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
  333. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
  334. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
  335. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
  336. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  337. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
  338. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  339. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  340. package/jest/mock.js +203 -203
  341. package/lib/commonjs/NativeRNLlama.js +1 -2
  342. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  343. package/lib/commonjs/chat.js.map +1 -1
  344. package/lib/commonjs/grammar.js +12 -31
  345. package/lib/commonjs/grammar.js.map +1 -1
  346. package/lib/commonjs/index.js +47 -47
  347. package/lib/commonjs/index.js.map +1 -1
  348. package/lib/commonjs/package.json +1 -0
  349. package/lib/module/NativeRNLlama.js +2 -0
  350. package/lib/module/NativeRNLlama.js.map +1 -1
  351. package/lib/module/chat.js +2 -0
  352. package/lib/module/chat.js.map +1 -1
  353. package/lib/module/grammar.js +14 -31
  354. package/lib/module/grammar.js.map +1 -1
  355. package/lib/module/index.js +47 -45
  356. package/lib/module/index.js.map +1 -1
  357. package/lib/module/package.json +1 -0
  358. package/lib/typescript/NativeRNLlama.d.ts +6 -4
  359. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  360. package/lib/typescript/index.d.ts.map +1 -1
  361. package/llama-rn.podspec +48 -48
  362. package/package.json +233 -233
  363. package/src/NativeRNLlama.ts +426 -424
  364. package/src/chat.ts +44 -44
  365. package/src/grammar.ts +854 -854
  366. package/src/index.ts +495 -485
@@ -0,0 +1,76 @@
1
+ #pragma once
2
+
3
+ #include "llama.h"
4
+
5
+ #include "ggml-cpp.h"
6
+
7
+ #include <string>
8
+ #include <unordered_map>
9
+ #include <vector>
10
+
11
+ // TODO: pimpl
12
+
13
+ //
14
+ // llama_adapter_cvec
15
+ //
16
+
17
+ struct llama_adapter_cvec {
18
+ lm_ggml_tensor * tensor_for(int il) const;
19
+
20
+ lm_ggml_tensor * apply_to(lm_ggml_context * ctx, lm_ggml_tensor * cur, int il) const;
21
+
22
+ bool apply(
23
+ const llama_model & model,
24
+ const float * data,
25
+ size_t len,
26
+ int32_t n_embd,
27
+ int32_t il_start,
28
+ int32_t il_end);
29
+
30
+ private:
31
+ bool init(const llama_model & model);
32
+
33
+ int32_t layer_start = -1;
34
+ int32_t layer_end = -1;
35
+
36
+ std::vector<lm_ggml_context_ptr> ctxs;
37
+ std::vector<lm_ggml_backend_buffer_ptr> bufs;
38
+
39
+ std::vector<lm_ggml_tensor *> tensors; // per layer
40
+ };
41
+
42
+ //
43
+ // llama_adapter_lora
44
+ //
45
+
46
+ struct llama_adapter_lora_weight {
47
+ lm_ggml_tensor * a = nullptr;
48
+ lm_ggml_tensor * b = nullptr;
49
+
50
+ // get actual scale based on rank and alpha
51
+ float get_scale(float alpha, float adapter_scale) const {
52
+ const float rank = (float) b->ne[0];
53
+ const float scale = alpha ? adapter_scale * alpha / rank : adapter_scale;
54
+ return scale;
55
+ }
56
+
57
+ llama_adapter_lora_weight() = default;
58
+ llama_adapter_lora_weight(lm_ggml_tensor * a, lm_ggml_tensor * b) : a(a), b(b) {}
59
+ };
60
+
61
+ struct llama_adapter_lora {
62
+ // map tensor name to lora_a_b
63
+ std::unordered_map<std::string, llama_adapter_lora_weight> ab_map;
64
+
65
+ std::vector<lm_ggml_context_ptr> ctxs;
66
+ std::vector<lm_ggml_backend_buffer_ptr> bufs;
67
+
68
+ float alpha;
69
+
70
+ llama_adapter_lora() = default;
71
+ ~llama_adapter_lora() = default;
72
+
73
+ llama_adapter_lora_weight * get_weight(lm_ggml_tensor * w);
74
+ };
75
+
76
+ using llama_adapter_loras = std::unordered_map<llama_adapter_lora *, float>;
@@ -0,0 +1,428 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h" // lm_ggml_op
4
+
5
+ #include <string>
6
+
7
+ //
8
+ // gguf constants (sync with gguf.py)
9
+ //
10
+
11
+ enum llm_arch {
12
+ LLM_ARCH_LLAMA,
13
+ LLM_ARCH_LLAMA4,
14
+ LLM_ARCH_DECI,
15
+ LLM_ARCH_FALCON,
16
+ LLM_ARCH_BAICHUAN,
17
+ LLM_ARCH_GROK,
18
+ LLM_ARCH_GPT2,
19
+ LLM_ARCH_GPTJ,
20
+ LLM_ARCH_GPTNEOX,
21
+ LLM_ARCH_MPT,
22
+ LLM_ARCH_STARCODER,
23
+ LLM_ARCH_REFACT,
24
+ LLM_ARCH_BERT,
25
+ LLM_ARCH_NOMIC_BERT,
26
+ LLM_ARCH_JINA_BERT_V2,
27
+ LLM_ARCH_BLOOM,
28
+ LLM_ARCH_STABLELM,
29
+ LLM_ARCH_QWEN,
30
+ LLM_ARCH_QWEN2,
31
+ LLM_ARCH_QWEN2MOE,
32
+ LLM_ARCH_QWEN2VL,
33
+ LLM_ARCH_QWEN3,
34
+ LLM_ARCH_QWEN3MOE,
35
+ LLM_ARCH_PHI2,
36
+ LLM_ARCH_PHI3,
37
+ LLM_ARCH_PHIMOE,
38
+ LLM_ARCH_PLAMO,
39
+ LLM_ARCH_CODESHELL,
40
+ LLM_ARCH_ORION,
41
+ LLM_ARCH_INTERNLM2,
42
+ LLM_ARCH_MINICPM,
43
+ LLM_ARCH_MINICPM3,
44
+ LLM_ARCH_GEMMA,
45
+ LLM_ARCH_GEMMA2,
46
+ LLM_ARCH_GEMMA3,
47
+ LLM_ARCH_STARCODER2,
48
+ LLM_ARCH_MAMBA,
49
+ LLM_ARCH_XVERSE,
50
+ LLM_ARCH_COMMAND_R,
51
+ LLM_ARCH_COHERE2,
52
+ LLM_ARCH_DBRX,
53
+ LLM_ARCH_OLMO,
54
+ LLM_ARCH_OLMO2,
55
+ LLM_ARCH_OLMOE,
56
+ LLM_ARCH_OPENELM,
57
+ LLM_ARCH_ARCTIC,
58
+ LLM_ARCH_DEEPSEEK,
59
+ LLM_ARCH_DEEPSEEK2,
60
+ LLM_ARCH_CHATGLM,
61
+ LLM_ARCH_BITNET,
62
+ LLM_ARCH_T5,
63
+ LLM_ARCH_T5ENCODER,
64
+ LLM_ARCH_JAIS,
65
+ LLM_ARCH_NEMOTRON,
66
+ LLM_ARCH_EXAONE,
67
+ LLM_ARCH_RWKV6,
68
+ LLM_ARCH_RWKV6QWEN2,
69
+ LLM_ARCH_RWKV7,
70
+ LLM_ARCH_ARWKV7,
71
+ LLM_ARCH_GRANITE,
72
+ LLM_ARCH_GRANITE_MOE,
73
+ LLM_ARCH_CHAMELEON,
74
+ LLM_ARCH_WAVTOKENIZER_DEC,
75
+ LLM_ARCH_PLM,
76
+ LLM_ARCH_BAILINGMOE,
77
+ LLM_ARCH_UNKNOWN,
78
+ };
79
+
80
+ enum llm_kv {
81
+ LLM_KV_GENERAL_TYPE,
82
+ LLM_KV_GENERAL_ARCHITECTURE,
83
+ LLM_KV_GENERAL_QUANTIZATION_VERSION,
84
+ LLM_KV_GENERAL_ALIGNMENT,
85
+ LLM_KV_GENERAL_FILE_TYPE,
86
+ LLM_KV_GENERAL_NAME,
87
+ LLM_KV_GENERAL_AUTHOR,
88
+ LLM_KV_GENERAL_VERSION,
89
+ LLM_KV_GENERAL_URL,
90
+ LLM_KV_GENERAL_DESCRIPTION,
91
+ LLM_KV_GENERAL_LICENSE,
92
+ LLM_KV_GENERAL_SOURCE_URL,
93
+ LLM_KV_GENERAL_SOURCE_HF_REPO,
94
+
95
+ LLM_KV_VOCAB_SIZE,
96
+ LLM_KV_CONTEXT_LENGTH,
97
+ LLM_KV_EMBEDDING_LENGTH,
98
+ LLM_KV_FEATURES_LENGTH,
99
+ LLM_KV_BLOCK_COUNT,
100
+ LLM_KV_LEADING_DENSE_BLOCK_COUNT,
101
+ LLM_KV_FEED_FORWARD_LENGTH,
102
+ LLM_KV_EXPERT_FEED_FORWARD_LENGTH,
103
+ LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH,
104
+ LLM_KV_USE_PARALLEL_RESIDUAL,
105
+ LLM_KV_TENSOR_DATA_LAYOUT,
106
+ LLM_KV_EXPERT_COUNT,
107
+ LLM_KV_EXPERT_USED_COUNT,
108
+ LLM_KV_EXPERT_SHARED_COUNT,
109
+ LLM_KV_EXPERT_WEIGHTS_SCALE,
110
+ LLM_KV_EXPERT_WEIGHTS_NORM,
111
+ LLM_KV_EXPERT_GATING_FUNC,
112
+ LLM_KV_POOLING_TYPE,
113
+ LLM_KV_LOGIT_SCALE,
114
+ LLM_KV_DECODER_START_TOKEN_ID,
115
+ LLM_KV_ATTN_LOGIT_SOFTCAPPING,
116
+ LLM_KV_FINAL_LOGIT_SOFTCAPPING,
117
+ LLM_KV_SWIN_NORM,
118
+ LLM_KV_RESCALE_EVERY_N_LAYERS,
119
+ LLM_KV_TIME_MIX_EXTRA_DIM,
120
+ LLM_KV_TIME_DECAY_EXTRA_DIM,
121
+ LLM_KV_RESIDUAL_SCALE,
122
+ LLM_KV_EMBEDDING_SCALE,
123
+ LLM_KV_TOKEN_SHIFT_COUNT,
124
+ LLM_KV_INTERLEAVE_MOE_LAYER_STEP,
125
+
126
+ LLM_KV_ATTENTION_HEAD_COUNT,
127
+ LLM_KV_ATTENTION_HEAD_COUNT_KV,
128
+ LLM_KV_ATTENTION_MAX_ALIBI_BIAS,
129
+ LLM_KV_ATTENTION_CLAMP_KQV,
130
+ LLM_KV_ATTENTION_KEY_LENGTH,
131
+ LLM_KV_ATTENTION_VALUE_LENGTH,
132
+ LLM_KV_ATTENTION_LAYERNORM_EPS,
133
+ LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
134
+ LLM_KV_ATTENTION_GROUPNORM_EPS,
135
+ LLM_KV_ATTENTION_GROUPNORM_GROUPS,
136
+ LLM_KV_ATTENTION_CAUSAL,
137
+ LLM_KV_ATTENTION_Q_LORA_RANK,
138
+ LLM_KV_ATTENTION_KV_LORA_RANK,
139
+ LLM_KV_ATTENTION_DECAY_LORA_RANK,
140
+ LLM_KV_ATTENTION_ICLR_LORA_RANK,
141
+ LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK,
142
+ LLM_KV_ATTENTION_GATE_LORA_RANK,
143
+ LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
144
+ LLM_KV_ATTENTION_SLIDING_WINDOW,
145
+ LLM_KV_ATTENTION_SCALE,
146
+
147
+ LLM_KV_ROPE_DIMENSION_COUNT,
148
+ LLM_KV_ROPE_DIMENSION_SECTIONS,
149
+ LLM_KV_ROPE_FREQ_BASE,
150
+ LLM_KV_ROPE_SCALE_LINEAR,
151
+ LLM_KV_ROPE_SCALING_TYPE,
152
+ LLM_KV_ROPE_SCALING_FACTOR,
153
+ LLM_KV_ROPE_SCALING_ATTN_FACTOR,
154
+ LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,
155
+ LLM_KV_ROPE_SCALING_FINETUNED,
156
+ LLM_KV_ROPE_SCALING_YARN_LOG_MUL,
157
+
158
+ LLM_KV_SPLIT_NO,
159
+ LLM_KV_SPLIT_COUNT,
160
+ LLM_KV_SPLIT_TENSORS_COUNT,
161
+
162
+ LLM_KV_SSM_INNER_SIZE,
163
+ LLM_KV_SSM_CONV_KERNEL,
164
+ LLM_KV_SSM_STATE_SIZE,
165
+ LLM_KV_SSM_TIME_STEP_RANK,
166
+ LLM_KV_SSM_DT_B_C_RMS,
167
+
168
+ LLM_KV_WKV_HEAD_SIZE,
169
+
170
+ LLM_KV_TOKENIZER_MODEL,
171
+ LLM_KV_TOKENIZER_PRE,
172
+ LLM_KV_TOKENIZER_LIST,
173
+ LLM_KV_TOKENIZER_TOKEN_TYPE,
174
+ LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,
175
+ LLM_KV_TOKENIZER_SCORES,
176
+ LLM_KV_TOKENIZER_MERGES,
177
+ LLM_KV_TOKENIZER_BOS_ID,
178
+ LLM_KV_TOKENIZER_EOS_ID,
179
+ LLM_KV_TOKENIZER_EOT_ID,
180
+ LLM_KV_TOKENIZER_EOM_ID,
181
+ LLM_KV_TOKENIZER_UNK_ID,
182
+ LLM_KV_TOKENIZER_SEP_ID,
183
+ LLM_KV_TOKENIZER_PAD_ID,
184
+ LLM_KV_TOKENIZER_CLS_ID,
185
+ LLM_KV_TOKENIZER_MASK_ID,
186
+ LLM_KV_TOKENIZER_ADD_BOS,
187
+ LLM_KV_TOKENIZER_ADD_EOS,
188
+ LLM_KV_TOKENIZER_ADD_PREFIX,
189
+ LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
190
+ LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
191
+ LLM_KV_TOKENIZER_HF_JSON,
192
+ LLM_KV_TOKENIZER_RWKV,
193
+ LLM_KV_TOKENIZER_CHAT_TEMPLATE,
194
+ LLM_KV_TOKENIZER_CHAT_TEMPLATE_N,
195
+ LLM_KV_TOKENIZER_FIM_PRE_ID,
196
+ LLM_KV_TOKENIZER_FIM_SUF_ID,
197
+ LLM_KV_TOKENIZER_FIM_MID_ID,
198
+ LLM_KV_TOKENIZER_FIM_PAD_ID,
199
+ LLM_KV_TOKENIZER_FIM_REP_ID,
200
+ LLM_KV_TOKENIZER_FIM_SEP_ID,
201
+
202
+ LLM_KV_ADAPTER_TYPE,
203
+ LLM_KV_ADAPTER_LORA_ALPHA,
204
+
205
+ LLM_KV_POSNET_EMBEDDING_LENGTH,
206
+ LLM_KV_POSNET_BLOCK_COUNT,
207
+
208
+ LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
209
+ LLM_KV_CONVNEXT_BLOCK_COUNT,
210
+
211
+ // deprecated:
212
+ LLM_KV_TOKENIZER_PREFIX_ID,
213
+ LLM_KV_TOKENIZER_SUFFIX_ID,
214
+ LLM_KV_TOKENIZER_MIDDLE_ID,
215
+ };
216
+
217
+ enum llm_tensor {
218
+ LLM_TENSOR_TOKEN_EMBD,
219
+ LLM_TENSOR_TOKEN_EMBD_NORM,
220
+ LLM_TENSOR_TOKEN_TYPES,
221
+ LLM_TENSOR_POS_EMBD,
222
+ LLM_TENSOR_OUTPUT,
223
+ LLM_TENSOR_OUTPUT_NORM,
224
+ LLM_TENSOR_ROPE_FREQS,
225
+ LLM_TENSOR_ROPE_FACTORS_LONG,
226
+ LLM_TENSOR_ROPE_FACTORS_SHORT,
227
+ LLM_TENSOR_ATTN_Q,
228
+ LLM_TENSOR_ATTN_K,
229
+ LLM_TENSOR_ATTN_V,
230
+ LLM_TENSOR_ATTN_QKV,
231
+ LLM_TENSOR_ATTN_OUT,
232
+ LLM_TENSOR_ATTN_NORM,
233
+ LLM_TENSOR_ATTN_NORM_2,
234
+ LLM_TENSOR_ATTN_OUT_NORM,
235
+ LLM_TENSOR_ATTN_POST_NORM,
236
+ LLM_TENSOR_ATTN_ROT_EMBD,
237
+ LLM_TENSOR_FFN_GATE_INP,
238
+ LLM_TENSOR_FFN_GATE_INP_SHEXP,
239
+ LLM_TENSOR_FFN_NORM,
240
+ LLM_TENSOR_FFN_POST_NORM,
241
+ LLM_TENSOR_FFN_GATE,
242
+ LLM_TENSOR_FFN_DOWN,
243
+ LLM_TENSOR_FFN_UP,
244
+ LLM_TENSOR_FFN_ACT,
245
+ LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility
246
+ LLM_TENSOR_FFN_GATE_EXP,
247
+ LLM_TENSOR_FFN_UP_EXP,
248
+ LLM_TENSOR_FFN_NORM_EXPS,
249
+ LLM_TENSOR_FFN_DOWN_EXPS, // merged experts
250
+ LLM_TENSOR_FFN_GATE_EXPS,
251
+ LLM_TENSOR_FFN_UP_EXPS,
252
+ LLM_TENSOR_FFN_DOWN_SHEXP,
253
+ LLM_TENSOR_FFN_GATE_SHEXP,
254
+ LLM_TENSOR_FFN_UP_SHEXP,
255
+ LLM_TENSOR_FFN_EXP_PROBS_B,
256
+ LLM_TENSOR_ATTN_Q_NORM,
257
+ LLM_TENSOR_ATTN_K_NORM,
258
+ LLM_TENSOR_LAYER_OUT_NORM,
259
+ LLM_TENSOR_SSM_IN,
260
+ LLM_TENSOR_SSM_CONV1D,
261
+ LLM_TENSOR_SSM_X,
262
+ LLM_TENSOR_SSM_DT,
263
+ LLM_TENSOR_SSM_A,
264
+ LLM_TENSOR_SSM_D,
265
+ LLM_TENSOR_SSM_OUT,
266
+ LLM_TENSOR_TIME_MIX_W0,
267
+ LLM_TENSOR_TIME_MIX_W1,
268
+ LLM_TENSOR_TIME_MIX_W2,
269
+ LLM_TENSOR_TIME_MIX_A0,
270
+ LLM_TENSOR_TIME_MIX_A1,
271
+ LLM_TENSOR_TIME_MIX_A2,
272
+ LLM_TENSOR_TIME_MIX_V0,
273
+ LLM_TENSOR_TIME_MIX_V1,
274
+ LLM_TENSOR_TIME_MIX_V2,
275
+ LLM_TENSOR_TIME_MIX_G1,
276
+ LLM_TENSOR_TIME_MIX_G2,
277
+ LLM_TENSOR_TIME_MIX_K_K,
278
+ LLM_TENSOR_TIME_MIX_K_A,
279
+ LLM_TENSOR_TIME_MIX_R_K,
280
+ LLM_TENSOR_TIME_MIX_LERP_X,
281
+ LLM_TENSOR_TIME_MIX_LERP_W,
282
+ LLM_TENSOR_TIME_MIX_LERP_K,
283
+ LLM_TENSOR_TIME_MIX_LERP_V,
284
+ LLM_TENSOR_TIME_MIX_LERP_R,
285
+ LLM_TENSOR_TIME_MIX_LERP_G,
286
+ LLM_TENSOR_TIME_MIX_LERP_FUSED,
287
+ LLM_TENSOR_TIME_MIX_FIRST,
288
+ LLM_TENSOR_TIME_MIX_DECAY,
289
+ LLM_TENSOR_TIME_MIX_DECAY_W1,
290
+ LLM_TENSOR_TIME_MIX_DECAY_W2,
291
+ LLM_TENSOR_TIME_MIX_KEY,
292
+ LLM_TENSOR_TIME_MIX_VALUE,
293
+ LLM_TENSOR_TIME_MIX_RECEPTANCE,
294
+ LLM_TENSOR_TIME_MIX_GATE,
295
+ LLM_TENSOR_TIME_MIX_LN,
296
+ LLM_TENSOR_TIME_MIX_OUTPUT,
297
+ LLM_TENSOR_CHANNEL_MIX_LERP_K,
298
+ LLM_TENSOR_CHANNEL_MIX_LERP_R,
299
+ LLM_TENSOR_CHANNEL_MIX_KEY,
300
+ LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,
301
+ LLM_TENSOR_CHANNEL_MIX_VALUE,
302
+ LLM_TENSOR_ATTN_Q_A,
303
+ LLM_TENSOR_ATTN_Q_B,
304
+ LLM_TENSOR_ATTN_KV_A_MQA,
305
+ LLM_TENSOR_ATTN_KV_B,
306
+ LLM_TENSOR_ATTN_Q_A_NORM,
307
+ LLM_TENSOR_ATTN_KV_A_NORM,
308
+ LLM_TENSOR_ATTN_SUB_NORM,
309
+ LLM_TENSOR_FFN_SUB_NORM,
310
+ LLM_TENSOR_DEC_ATTN_NORM,
311
+ LLM_TENSOR_DEC_ATTN_Q,
312
+ LLM_TENSOR_DEC_ATTN_K,
313
+ LLM_TENSOR_DEC_ATTN_V,
314
+ LLM_TENSOR_DEC_ATTN_OUT,
315
+ LLM_TENSOR_DEC_ATTN_REL_B,
316
+ LLM_TENSOR_DEC_CROSS_ATTN_NORM,
317
+ LLM_TENSOR_DEC_CROSS_ATTN_Q,
318
+ LLM_TENSOR_DEC_CROSS_ATTN_K,
319
+ LLM_TENSOR_DEC_CROSS_ATTN_V,
320
+ LLM_TENSOR_DEC_CROSS_ATTN_OUT,
321
+ LLM_TENSOR_DEC_CROSS_ATTN_REL_B,
322
+ LLM_TENSOR_DEC_FFN_NORM,
323
+ LLM_TENSOR_DEC_FFN_GATE,
324
+ LLM_TENSOR_DEC_FFN_DOWN,
325
+ LLM_TENSOR_DEC_FFN_UP,
326
+ LLM_TENSOR_DEC_OUTPUT_NORM,
327
+ LLM_TENSOR_ENC_ATTN_NORM,
328
+ LLM_TENSOR_ENC_ATTN_Q,
329
+ LLM_TENSOR_ENC_ATTN_K,
330
+ LLM_TENSOR_ENC_ATTN_V,
331
+ LLM_TENSOR_ENC_ATTN_OUT,
332
+ LLM_TENSOR_ENC_ATTN_REL_B,
333
+ LLM_TENSOR_ENC_FFN_NORM,
334
+ LLM_TENSOR_ENC_FFN_GATE,
335
+ LLM_TENSOR_ENC_FFN_DOWN,
336
+ LLM_TENSOR_ENC_FFN_UP,
337
+ LLM_TENSOR_ENC_OUTPUT_NORM,
338
+ LLM_TENSOR_CLS,
339
+ LLM_TENSOR_CLS_OUT,
340
+ LLM_TENSOR_CONV1D,
341
+ LLM_TENSOR_CONVNEXT_DW,
342
+ LLM_TENSOR_CONVNEXT_NORM,
343
+ LLM_TENSOR_CONVNEXT_PW1,
344
+ LLM_TENSOR_CONVNEXT_PW2,
345
+ LLM_TENSOR_CONVNEXT_GAMMA,
346
+ LLM_TENSOR_POS_NET_CONV1,
347
+ LLM_TENSOR_POS_NET_CONV2,
348
+ LLM_TENSOR_POS_NET_NORM,
349
+ LLM_TENSOR_POS_NET_NORM1,
350
+ LLM_TENSOR_POS_NET_NORM2,
351
+ LLM_TENSOR_POS_NET_ATTN_NORM,
352
+ LLM_TENSOR_POS_NET_ATTN_Q,
353
+ LLM_TENSOR_POS_NET_ATTN_K,
354
+ LLM_TENSOR_POS_NET_ATTN_V,
355
+ LLM_TENSOR_POS_NET_ATTN_OUT,
356
+ };
357
+
358
+ enum llm_tensor_layer {
359
+ LLM_TENSOR_LAYER_INPUT,
360
+ LLM_TENSOR_LAYER_REPEATING,
361
+ LLM_TENSOR_LAYER_OUTPUT,
362
+ };
363
+
364
+ struct LLM_KV {
365
+ LLM_KV(llm_arch arch, const char * suffix = nullptr);
366
+
367
+ llm_arch arch;
368
+ const char * suffix;
369
+
370
+ std::string operator()(llm_kv kv) const;
371
+ };
372
+
373
+ // helper to handle gguf constants
374
+ // usage:
375
+ //
376
+ // const auto tn = LLM_TN(LLM_ARCH_LLAMA);
377
+ //
378
+ // std::string name = tn(LLM_TENSOR_OUTPUT); -> "output"
379
+ // std::string name = tn(LLM_TENSOR_TOKEN_EMBD, "bias"); -> "token_embd.bias"
380
+ // std::string name = tn(LLM_TENSOR_ATTN_NORM, "weight", 3); -> "blk.3.attn_norm.weight"
381
+ //
382
+ struct LLM_TN_IMPL {
383
+ const llm_arch arch;
384
+ const llm_tensor tensor;
385
+ const char * const suffix;
386
+ const int bid;
387
+ const int xid;
388
+
389
+ std::string str() const;
390
+
391
+ operator std::string() const {
392
+ return str();
393
+ }
394
+
395
+ friend bool operator==(const std::string & str, const LLM_TN_IMPL & tn) {
396
+ return str == tn.str();
397
+ }
398
+
399
+ friend bool operator!=(const std::string & str, const LLM_TN_IMPL & tn) {
400
+ return str != tn.str();
401
+ }
402
+ };
403
+
404
+ struct LLM_TN {
405
+ LLM_TN(llm_arch arch) : arch(arch) {}
406
+
407
+ llm_arch arch;
408
+
409
+ LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
410
+ return { arch, tensor, suffix, bid, xid };
411
+ }
412
+
413
+ LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
414
+ return { arch, tensor, nullptr, bid, xid };
415
+ }
416
+ };
417
+
418
+
419
+ struct llm_tensor_info {
420
+ llm_tensor_layer layer;
421
+ lm_ggml_op op;
422
+ };
423
+
424
+ const char * llm_arch_name(llm_arch arch);
425
+
426
+ llm_arch llm_arch_from_string(const std::string & name);
427
+
428
+ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
@@ -0,0 +1,88 @@
1
+ #pragma once
2
+
3
+ #include "llama.h"
4
+
5
+ #include <array>
6
+ #include <vector>
7
+
8
+ // very similar to llama_batch,
9
+ // but has more metadata about sequences
10
+ struct llama_ubatch {
11
+ bool equal_seqs;
12
+ // TODO: whole_seqs for embeddings?
13
+
14
+ uint32_t n_tokens; // total tokens (n_seq_tokens * n_seqs)
15
+ uint32_t n_seq_tokens; // tokens per sequence
16
+ uint32_t n_seqs;
17
+
18
+ llama_token * token; // [n_tokens]
19
+ float * embd; // [n_embd, n_tokens]
20
+ llama_pos * pos; // [n_tokens]
21
+ int32_t * n_seq_id; // [n_seqs]
22
+ llama_seq_id ** seq_id; // [n_seqs]
23
+ int8_t * output; // [n_tokens]
24
+ };
25
+
26
+ struct llama_sbatch_seq {
27
+ int32_t n_seq_id;
28
+
29
+ llama_seq_id * seq_id;
30
+
31
+ size_t offset;
32
+ size_t length;
33
+ };
34
+
35
+ // sequence-length-aware batch splitting
36
+ struct llama_sbatch {
37
+ // tokens left in this batch
38
+ size_t n_tokens;
39
+
40
+ size_t n_embd;
41
+
42
+ bool logits_all; // TODO: remove once lctx.logits_all is removed too
43
+
44
+ // sorted indices into the batch
45
+ std::vector<int64_t> ids;
46
+ // batch indices of the output
47
+ std::vector<int64_t> out_ids;
48
+ std::vector<llama_sbatch_seq> seq;
49
+
50
+ const llama_batch * batch = nullptr;
51
+
52
+ // buffers for the ubatch
53
+ std::vector<llama_token> ubatch_token;
54
+ std::vector<float> ubatch_embd;
55
+ std::vector<llama_pos> ubatch_pos;
56
+ std::vector<int32_t> ubatch_n_seq_id;
57
+ std::vector<llama_seq_id *> ubatch_seq_id;
58
+ std::vector<int8_t> ubatch_output;
59
+
60
+ llama_ubatch reserve_ubatch(size_t n_ubatch, bool has_embd = false);
61
+
62
+ void add_seq_to_ubatch(llama_ubatch & ubatch, llama_sbatch_seq & seq, size_t length);
63
+
64
+ // simple split, unknown number of sequences of unequal lengths
65
+ llama_ubatch split_simple(size_t n_ubatch);
66
+
67
+ // make batches of equal-length sequences
68
+ llama_ubatch split_equal(size_t n_ubatch);
69
+
70
+ // sequence-wise split
71
+ llama_ubatch split_seq(size_t n_ubatch);
72
+
73
+ void from_batch(const llama_batch & batch, size_t n_embd, bool simple_split = false, bool logits_all = false);
74
+ };
75
+
76
+ // temporary allocate memory for the input batch if needed
77
+ struct llama_batch_allocr {
78
+ struct llama_batch batch;
79
+
80
+ std::array<llama_seq_id, 1> seq_id_0 = { 0 }; // default sequence id
81
+ std::vector<llama_pos> pos;
82
+ std::vector<int32_t> n_seq_id;
83
+ std::vector<llama_seq_id *> seq_id;
84
+ std::vector<int8_t> logits;
85
+
86
+ // optionally fulfill the batch returned by llama_batch_get_one
87
+ llama_batch_allocr(struct llama_batch in_batch, llama_pos p0);
88
+ };
@@ -0,0 +1,56 @@
1
+ #pragma once
2
+
3
+ #include <string>
4
+ #include <vector>
5
+ #include <cstdint>
6
+
7
+ enum llm_chat_template {
8
+ LLM_CHAT_TEMPLATE_CHATML,
9
+ LLM_CHAT_TEMPLATE_LLAMA_2,
10
+ LLM_CHAT_TEMPLATE_LLAMA_2_SYS,
11
+ LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS,
12
+ LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP,
13
+ LLM_CHAT_TEMPLATE_MISTRAL_V1,
14
+ LLM_CHAT_TEMPLATE_MISTRAL_V3,
15
+ LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN,
16
+ LLM_CHAT_TEMPLATE_MISTRAL_V7,
17
+ LLM_CHAT_TEMPLATE_PHI_3,
18
+ LLM_CHAT_TEMPLATE_PHI_4,
19
+ LLM_CHAT_TEMPLATE_FALCON_3,
20
+ LLM_CHAT_TEMPLATE_ZEPHYR,
21
+ LLM_CHAT_TEMPLATE_MONARCH,
22
+ LLM_CHAT_TEMPLATE_GEMMA,
23
+ LLM_CHAT_TEMPLATE_ORION,
24
+ LLM_CHAT_TEMPLATE_OPENCHAT,
25
+ LLM_CHAT_TEMPLATE_VICUNA,
26
+ LLM_CHAT_TEMPLATE_VICUNA_ORCA,
27
+ LLM_CHAT_TEMPLATE_DEEPSEEK,
28
+ LLM_CHAT_TEMPLATE_DEEPSEEK_2,
29
+ LLM_CHAT_TEMPLATE_DEEPSEEK_3,
30
+ LLM_CHAT_TEMPLATE_COMMAND_R,
31
+ LLM_CHAT_TEMPLATE_LLAMA_3,
32
+ LLM_CHAT_TEMPLATE_CHATGML_3,
33
+ LLM_CHAT_TEMPLATE_CHATGML_4,
34
+ LLM_CHAT_TEMPLATE_GLMEDGE,
35
+ LLM_CHAT_TEMPLATE_MINICPM,
36
+ LLM_CHAT_TEMPLATE_EXAONE_3,
37
+ LLM_CHAT_TEMPLATE_RWKV_WORLD,
38
+ LLM_CHAT_TEMPLATE_GRANITE,
39
+ LLM_CHAT_TEMPLATE_GIGACHAT,
40
+ LLM_CHAT_TEMPLATE_MEGREZ,
41
+ LLM_CHAT_TEMPLATE_YANDEX,
42
+ LLM_CHAT_TEMPLATE_BAILING,
43
+ LLM_CHAT_TEMPLATE_LLAMA4,
44
+ LLM_CHAT_TEMPLATE_UNKNOWN,
45
+ };
46
+
47
+ struct llama_chat_message;
48
+
49
+ llm_chat_template llm_chat_template_from_str(const std::string & name);
50
+
51
+ llm_chat_template llm_chat_detect_template(const std::string & tmpl);
52
+
53
+ int32_t llm_chat_apply_template(
54
+ llm_chat_template tmpl,
55
+ const std::vector<const llama_chat_message *> & chat,
56
+ std::string & dest, bool add_ass);