cui-llama.rn 1.4.6 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (366) hide show
  1. package/LICENSE +20 -20
  2. package/README.md +317 -319
  3. package/android/build.gradle +116 -116
  4. package/android/gradle.properties +5 -5
  5. package/android/src/main/AndroidManifest.xml +4 -4
  6. package/android/src/main/CMakeLists.txt +124 -117
  7. package/android/src/main/java/com/rnllama/LlamaContext.java +645 -645
  8. package/android/src/main/java/com/rnllama/RNLlama.java +695 -695
  9. package/android/src/main/java/com/rnllama/RNLlamaPackage.java +48 -48
  10. package/android/src/main/jni-utils.h +100 -100
  11. package/android/src/main/jni.cpp +1263 -1245
  12. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  14. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  15. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  16. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  17. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  18. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  19. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  20. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +135 -135
  21. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +136 -136
  22. package/cpp/README.md +4 -4
  23. package/cpp/binary-ops.cpp +158 -0
  24. package/cpp/binary-ops.h +16 -0
  25. package/cpp/chat.cpp +1769 -1779
  26. package/cpp/chat.h +9 -1
  27. package/cpp/common.cpp +20 -522
  28. package/cpp/common.h +13 -36
  29. package/cpp/cpu-common.h +72 -0
  30. package/cpp/ggml-common.h +12 -6
  31. package/cpp/ggml-cpu-aarch64.cpp +1557 -80
  32. package/cpp/ggml-cpu-impl.h +2 -21
  33. package/cpp/ggml-cpu-quants.c +904 -405
  34. package/cpp/ggml-cpu.c +909 -13237
  35. package/cpp/ggml-impl.h +50 -23
  36. package/cpp/ggml-llama-sim.metallib +0 -0
  37. package/cpp/ggml-llama.metallib +0 -0
  38. package/cpp/ggml-metal-impl.h +597 -523
  39. package/cpp/ggml-metal.m +798 -580
  40. package/cpp/ggml.c +92 -3
  41. package/cpp/ggml.h +30 -6
  42. package/cpp/gguf.cpp +1 -0
  43. package/cpp/llama-adapter.cpp +55 -20
  44. package/cpp/llama-adapter.h +11 -9
  45. package/cpp/llama-arch.cpp +217 -16
  46. package/cpp/llama-arch.h +25 -0
  47. package/cpp/llama-batch.h +2 -2
  48. package/cpp/llama-chat.cpp +54 -2
  49. package/cpp/llama-chat.h +3 -0
  50. package/cpp/llama-context.cpp +2294 -1238
  51. package/cpp/llama-context.h +214 -77
  52. package/cpp/llama-cparams.h +1 -0
  53. package/cpp/llama-graph.cpp +1695 -0
  54. package/cpp/llama-graph.h +592 -0
  55. package/cpp/llama-hparams.cpp +8 -0
  56. package/cpp/llama-hparams.h +17 -0
  57. package/cpp/llama-io.cpp +15 -0
  58. package/cpp/llama-io.h +35 -0
  59. package/cpp/llama-kv-cache.cpp +965 -303
  60. package/cpp/llama-kv-cache.h +145 -151
  61. package/cpp/llama-memory.cpp +1 -0
  62. package/cpp/llama-memory.h +21 -0
  63. package/cpp/llama-mmap.cpp +1 -1
  64. package/cpp/llama-model-loader.cpp +10 -5
  65. package/cpp/llama-model-loader.h +5 -3
  66. package/cpp/llama-model.cpp +9194 -201
  67. package/cpp/llama-model.h +40 -1
  68. package/cpp/llama-sampling.cpp +5 -0
  69. package/cpp/llama-vocab.cpp +36 -5
  70. package/cpp/llama.cpp +51 -9984
  71. package/cpp/llama.h +102 -22
  72. package/cpp/log.cpp +34 -0
  73. package/cpp/minja/chat-template.hpp +15 -7
  74. package/cpp/minja/minja.hpp +120 -94
  75. package/cpp/ops.cpp +8723 -0
  76. package/cpp/ops.h +128 -0
  77. package/cpp/rn-llama.cpp +873 -882
  78. package/cpp/rn-llama.h +138 -148
  79. package/cpp/sampling.cpp +3 -0
  80. package/cpp/sampling.h +107 -107
  81. package/cpp/sgemm.cpp +533 -88
  82. package/cpp/simd-mappings.h +888 -0
  83. package/cpp/speculative.cpp +4 -4
  84. package/cpp/unary-ops.cpp +186 -0
  85. package/cpp/unary-ops.h +28 -0
  86. package/cpp/unicode-data.cpp +7034 -7034
  87. package/cpp/unicode-data.h +20 -20
  88. package/cpp/unicode.cpp +849 -849
  89. package/cpp/unicode.h +66 -66
  90. package/cpp/vec.cpp +258 -0
  91. package/cpp/vec.h +802 -0
  92. package/ios/CMakeLists.txt +116 -105
  93. package/ios/RNLlama.h +7 -7
  94. package/ios/RNLlama.mm +418 -405
  95. package/ios/RNLlamaContext.h +57 -57
  96. package/ios/RNLlamaContext.mm +835 -819
  97. package/ios/rnllama.xcframework/Info.plist +74 -74
  98. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
  99. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +143 -0
  100. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +677 -0
  101. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
  102. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
  103. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  104. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
  105. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
  106. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
  107. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  108. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  109. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  110. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  111. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
  112. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
  113. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  114. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
  115. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
  116. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
  117. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
  118. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +2222 -0
  119. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/gguf.h +202 -0
  120. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  121. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json.hpp +24766 -0
  122. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +265 -0
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
  131. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
  132. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
  133. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-io.h +35 -0
  134. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  135. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
  136. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
  137. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
  138. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +409 -0
  139. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
  140. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
  141. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +1434 -0
  142. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/log.h +132 -0
  143. package/{cpp → ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja}/chat-template.hpp +15 -7
  144. package/{cpp → ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja}/minja.hpp +120 -94
  145. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +128 -0
  146. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
  147. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sampling.h +107 -0
  148. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +14 -0
  149. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
  150. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/speculative.h +28 -0
  151. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
  152. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
  153. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode.h +66 -0
  154. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +802 -0
  155. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
  156. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  157. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
  160. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
  161. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
  162. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
  163. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  164. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
  165. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
  166. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
  167. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  168. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  169. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  170. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  171. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
  172. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
  173. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  174. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
  175. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
  176. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
  177. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
  178. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
  179. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
  180. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  181. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
  182. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
  183. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
  184. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
  185. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
  186. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
  187. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
  188. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
  189. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
  190. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
  191. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
  192. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
  193. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
  194. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  195. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
  196. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
  197. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
  198. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
  199. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
  200. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
  201. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
  202. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
  203. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  204. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  205. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
  206. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
  207. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
  208. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
  209. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
  210. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
  211. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
  212. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
  213. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
  214. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
  215. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  216. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
  217. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  218. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  219. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
  220. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +143 -0
  221. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +677 -0
  222. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
  223. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
  224. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  225. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
  226. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
  227. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
  228. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  229. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  230. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  231. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  232. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
  233. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
  234. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  235. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
  236. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
  237. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
  238. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
  239. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +2222 -0
  240. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/gguf.h +202 -0
  241. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  242. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json.hpp +24766 -0
  243. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
  244. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
  245. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
  246. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
  247. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +265 -0
  248. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
  249. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
  250. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
  251. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
  252. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
  253. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
  254. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-io.h +35 -0
  255. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  256. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
  257. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
  258. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
  259. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +409 -0
  260. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
  261. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
  262. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +1434 -0
  263. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/log.h +132 -0
  264. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  265. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  266. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +128 -0
  267. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
  268. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sampling.h +107 -0
  269. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +14 -0
  270. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
  271. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/speculative.h +28 -0
  272. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
  273. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
  274. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode.h +66 -0
  275. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/vec.h +802 -0
  276. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
  277. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  278. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  279. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
  280. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
  281. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
  282. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
  283. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
  284. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
  285. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
  286. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
  287. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
  288. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
  289. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
  290. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
  291. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
  292. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
  293. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
  294. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
  295. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
  296. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
  297. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
  298. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
  299. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
  300. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
  301. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
  302. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
  303. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
  304. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
  305. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
  306. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
  307. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
  308. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
  309. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
  310. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
  311. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
  312. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
  313. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
  314. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
  315. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
  316. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
  317. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
  318. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
  319. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
  320. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
  321. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
  322. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
  323. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
  324. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
  325. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
  326. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
  327. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
  328. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
  329. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
  330. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
  331. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
  332. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
  333. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
  334. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
  335. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
  336. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  337. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
  338. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  339. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  340. package/jest/mock.js +203 -203
  341. package/lib/commonjs/NativeRNLlama.js +1 -2
  342. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  343. package/lib/commonjs/chat.js.map +1 -1
  344. package/lib/commonjs/grammar.js +12 -31
  345. package/lib/commonjs/grammar.js.map +1 -1
  346. package/lib/commonjs/index.js +47 -47
  347. package/lib/commonjs/index.js.map +1 -1
  348. package/lib/commonjs/package.json +1 -0
  349. package/lib/module/NativeRNLlama.js +2 -0
  350. package/lib/module/NativeRNLlama.js.map +1 -1
  351. package/lib/module/chat.js +2 -0
  352. package/lib/module/chat.js.map +1 -1
  353. package/lib/module/grammar.js +14 -31
  354. package/lib/module/grammar.js.map +1 -1
  355. package/lib/module/index.js +47 -45
  356. package/lib/module/index.js.map +1 -1
  357. package/lib/module/package.json +1 -0
  358. package/lib/typescript/NativeRNLlama.d.ts +6 -4
  359. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  360. package/lib/typescript/index.d.ts.map +1 -1
  361. package/llama-rn.podspec +48 -48
  362. package/package.json +233 -233
  363. package/src/NativeRNLlama.ts +426 -424
  364. package/src/chat.ts +44 -44
  365. package/src/grammar.ts +854 -854
  366. package/src/index.ts +495 -485
@@ -6,6 +6,7 @@
6
6
 
7
7
  static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
8
8
  { LLM_ARCH_LLAMA, "llama" },
9
+ { LLM_ARCH_LLAMA4, "llama4" },
9
10
  { LLM_ARCH_DECI, "deci" },
10
11
  { LLM_ARCH_FALCON, "falcon" },
11
12
  { LLM_ARCH_GROK, "grok" },
@@ -25,6 +26,8 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
25
26
  { LLM_ARCH_QWEN2, "qwen2" },
26
27
  { LLM_ARCH_QWEN2MOE, "qwen2moe" },
27
28
  { LLM_ARCH_QWEN2VL, "qwen2vl" },
29
+ { LLM_ARCH_QWEN3, "qwen3" },
30
+ { LLM_ARCH_QWEN3MOE, "qwen3moe" },
28
31
  { LLM_ARCH_PHI2, "phi2" },
29
32
  { LLM_ARCH_PHI3, "phi3" },
30
33
  { LLM_ARCH_PHIMOE, "phimoe" },
@@ -59,10 +62,14 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
59
62
  { LLM_ARCH_EXAONE, "exaone" },
60
63
  { LLM_ARCH_RWKV6, "rwkv6" },
61
64
  { LLM_ARCH_RWKV6QWEN2, "rwkv6qwen2" },
65
+ { LLM_ARCH_RWKV7, "rwkv7" },
66
+ { LLM_ARCH_ARWKV7, "arwkv7" },
62
67
  { LLM_ARCH_GRANITE, "granite" },
63
68
  { LLM_ARCH_GRANITE_MOE, "granitemoe" },
64
69
  { LLM_ARCH_CHAMELEON, "chameleon" },
65
70
  { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
71
+ { LLM_ARCH_PLM, "plm" },
72
+ { LLM_ARCH_BAILINGMOE, "bailingmoe" },
66
73
  { LLM_ARCH_UNKNOWN, "(unknown)" },
67
74
  };
68
75
 
@@ -71,6 +78,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
71
78
  { LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" },
72
79
  { LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" },
73
80
  { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" },
81
+ { LLM_KV_GENERAL_FILE_TYPE, "general.file_type" },
74
82
  { LLM_KV_GENERAL_NAME, "general.name" },
75
83
  { LLM_KV_GENERAL_AUTHOR, "general.author" },
76
84
  { LLM_KV_GENERAL_VERSION, "general.version" },
@@ -109,23 +117,28 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
109
117
  { LLM_KV_RESIDUAL_SCALE, "%s.residual_scale" },
110
118
  { LLM_KV_EMBEDDING_SCALE, "%s.embedding_scale" },
111
119
  { LLM_KV_TOKEN_SHIFT_COUNT, "%s.token_shift_count" },
120
+ { LLM_KV_INTERLEAVE_MOE_LAYER_STEP, "%s.interleave_moe_layer_step" },
112
121
 
113
- { LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
114
- { LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
115
- { LLM_KV_ATTENTION_MAX_ALIBI_BIAS, "%s.attention.max_alibi_bias" },
116
- { LLM_KV_ATTENTION_CLAMP_KQV, "%s.attention.clamp_kqv" },
117
- { LLM_KV_ATTENTION_KEY_LENGTH, "%s.attention.key_length" },
118
- { LLM_KV_ATTENTION_VALUE_LENGTH, "%s.attention.value_length" },
119
- { LLM_KV_ATTENTION_LAYERNORM_EPS, "%s.attention.layer_norm_epsilon" },
120
- { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" },
121
- { LLM_KV_ATTENTION_GROUPNORM_EPS, "%s.attention.group_norm_epsilon" },
122
- { LLM_KV_ATTENTION_GROUPNORM_GROUPS, "%s.attention.group_norm_groups" },
123
- { LLM_KV_ATTENTION_CAUSAL, "%s.attention.causal" },
124
- { LLM_KV_ATTENTION_Q_LORA_RANK, "%s.attention.q_lora_rank" },
125
- { LLM_KV_ATTENTION_KV_LORA_RANK, "%s.attention.kv_lora_rank" },
126
- { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, "%s.attention.relative_buckets_count" },
127
- { LLM_KV_ATTENTION_SLIDING_WINDOW, "%s.attention.sliding_window" },
128
- { LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
122
+ { LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
123
+ { LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
124
+ { LLM_KV_ATTENTION_MAX_ALIBI_BIAS, "%s.attention.max_alibi_bias" },
125
+ { LLM_KV_ATTENTION_CLAMP_KQV, "%s.attention.clamp_kqv" },
126
+ { LLM_KV_ATTENTION_KEY_LENGTH, "%s.attention.key_length" },
127
+ { LLM_KV_ATTENTION_VALUE_LENGTH, "%s.attention.value_length" },
128
+ { LLM_KV_ATTENTION_LAYERNORM_EPS, "%s.attention.layer_norm_epsilon" },
129
+ { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" },
130
+ { LLM_KV_ATTENTION_GROUPNORM_EPS, "%s.attention.group_norm_epsilon" },
131
+ { LLM_KV_ATTENTION_GROUPNORM_GROUPS, "%s.attention.group_norm_groups" },
132
+ { LLM_KV_ATTENTION_CAUSAL, "%s.attention.causal" },
133
+ { LLM_KV_ATTENTION_Q_LORA_RANK, "%s.attention.q_lora_rank" },
134
+ { LLM_KV_ATTENTION_KV_LORA_RANK, "%s.attention.kv_lora_rank" },
135
+ { LLM_KV_ATTENTION_DECAY_LORA_RANK, "%s.attention.decay_lora_rank" },
136
+ { LLM_KV_ATTENTION_ICLR_LORA_RANK, "%s.attention.iclr_lora_rank" },
137
+ { LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK, "%s.attention.value_residual_mix_lora_rank" },
138
+ { LLM_KV_ATTENTION_GATE_LORA_RANK, "%s.attention.gate_lora_rank" },
139
+ { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, "%s.attention.relative_buckets_count" },
140
+ { LLM_KV_ATTENTION_SLIDING_WINDOW, "%s.attention.sliding_window" },
141
+ { LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
129
142
 
130
143
  { LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
131
144
  { LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
@@ -224,6 +237,35 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
224
237
  { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
225
238
  },
226
239
  },
240
+ {
241
+ LLM_ARCH_LLAMA4,
242
+ {
243
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
244
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
245
+ { LLM_TENSOR_OUTPUT, "output" },
246
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
247
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
248
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
249
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
250
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
251
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
252
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
253
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
254
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
255
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
256
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
257
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
258
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
259
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
260
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
261
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
262
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
263
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
264
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
265
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
266
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
267
+ },
268
+ },
227
269
  {
228
270
  LLM_ARCH_DECI,
229
271
  {
@@ -555,6 +597,45 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
555
597
  { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
556
598
  },
557
599
  },
600
+ {
601
+ LLM_ARCH_QWEN3,
602
+ {
603
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
604
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
605
+ { LLM_TENSOR_OUTPUT, "output" },
606
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
607
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
608
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
609
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
610
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
611
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
612
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
613
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
614
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
615
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
616
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
617
+ },
618
+ },
619
+ {
620
+ LLM_ARCH_QWEN3MOE,
621
+ {
622
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
623
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
624
+ { LLM_TENSOR_OUTPUT, "output" },
625
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
626
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
627
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
628
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
629
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
630
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
631
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
632
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
633
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
634
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
635
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
636
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
637
+ },
638
+ },
558
639
  {
559
640
  LLM_ARCH_PHI2,
560
641
  {
@@ -772,6 +853,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
772
853
  {
773
854
  { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
774
855
  { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
856
+ { LLM_TENSOR_OUTPUT, "output" },
775
857
  { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
776
858
  { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
777
859
  { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
@@ -1036,6 +1118,22 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1036
1118
  { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
1037
1119
  },
1038
1120
  },
1121
+ {
1122
+ LLM_ARCH_PLM,
1123
+ {
1124
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1125
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1126
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1127
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1128
+ { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
1129
+ { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
1130
+ { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
1131
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1132
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1133
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1134
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1135
+ },
1136
+ },
1039
1137
  {
1040
1138
  LLM_ARCH_CHATGLM,
1041
1139
  {
@@ -1238,6 +1336,74 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1238
1336
  { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1239
1337
  },
1240
1338
  },
1339
+ {
1340
+ LLM_ARCH_RWKV7,
1341
+ {
1342
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1343
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1344
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1345
+ { LLM_TENSOR_OUTPUT, "output" },
1346
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1347
+ { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
1348
+ { LLM_TENSOR_TIME_MIX_W0, "blk.%d.time_mix_w0" },
1349
+ { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1350
+ { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1351
+ { LLM_TENSOR_TIME_MIX_A0, "blk.%d.time_mix_a0" },
1352
+ { LLM_TENSOR_TIME_MIX_A1, "blk.%d.time_mix_a1" },
1353
+ { LLM_TENSOR_TIME_MIX_A2, "blk.%d.time_mix_a2" },
1354
+ { LLM_TENSOR_TIME_MIX_V0, "blk.%d.time_mix_v0" },
1355
+ { LLM_TENSOR_TIME_MIX_V1, "blk.%d.time_mix_v1" },
1356
+ { LLM_TENSOR_TIME_MIX_V2, "blk.%d.time_mix_v2" },
1357
+ { LLM_TENSOR_TIME_MIX_G1, "blk.%d.time_mix_g1" },
1358
+ { LLM_TENSOR_TIME_MIX_G2, "blk.%d.time_mix_g2" },
1359
+ { LLM_TENSOR_TIME_MIX_K_K, "blk.%d.time_mix_k_k" },
1360
+ { LLM_TENSOR_TIME_MIX_K_A, "blk.%d.time_mix_k_a" },
1361
+ { LLM_TENSOR_TIME_MIX_R_K, "blk.%d.time_mix_r_k" },
1362
+ { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1363
+ { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1364
+ { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1365
+ { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1366
+ { LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
1367
+ { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1368
+ { LLM_TENSOR_CHANNEL_MIX_LERP_K, "blk.%d.channel_mix_lerp_k" },
1369
+ { LLM_TENSOR_CHANNEL_MIX_KEY, "blk.%d.channel_mix_key" },
1370
+ { LLM_TENSOR_CHANNEL_MIX_VALUE, "blk.%d.channel_mix_value" },
1371
+ },
1372
+ },
1373
+ {
1374
+ LLM_ARCH_ARWKV7,
1375
+ {
1376
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1377
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1378
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1379
+ { LLM_TENSOR_OUTPUT, "output" },
1380
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1381
+ { LLM_TENSOR_TIME_MIX_W0, "blk.%d.time_mix_w0" },
1382
+ { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1383
+ { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1384
+ { LLM_TENSOR_TIME_MIX_A0, "blk.%d.time_mix_a0" },
1385
+ { LLM_TENSOR_TIME_MIX_A1, "blk.%d.time_mix_a1" },
1386
+ { LLM_TENSOR_TIME_MIX_A2, "blk.%d.time_mix_a2" },
1387
+ { LLM_TENSOR_TIME_MIX_V0, "blk.%d.time_mix_v0" },
1388
+ { LLM_TENSOR_TIME_MIX_V1, "blk.%d.time_mix_v1" },
1389
+ { LLM_TENSOR_TIME_MIX_V2, "blk.%d.time_mix_v2" },
1390
+ { LLM_TENSOR_TIME_MIX_G1, "blk.%d.time_mix_g1" },
1391
+ { LLM_TENSOR_TIME_MIX_G2, "blk.%d.time_mix_g2" },
1392
+ { LLM_TENSOR_TIME_MIX_K_K, "blk.%d.time_mix_k_k" },
1393
+ { LLM_TENSOR_TIME_MIX_K_A, "blk.%d.time_mix_k_a" },
1394
+ { LLM_TENSOR_TIME_MIX_R_K, "blk.%d.time_mix_r_k" },
1395
+ { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1396
+ { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1397
+ { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1398
+ { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1399
+ { LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
1400
+ { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1401
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1402
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1403
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1404
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1405
+ },
1406
+ },
1241
1407
  {
1242
1408
  LLM_ARCH_GRANITE,
1243
1409
  {
@@ -1317,6 +1483,29 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1317
1483
  { LLM_TENSOR_POS_NET_ATTN_OUT, "posnet.%d.attn_output" },
1318
1484
  },
1319
1485
  },
1486
+ {
1487
+ LLM_ARCH_BAILINGMOE,
1488
+ {
1489
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1490
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1491
+ { LLM_TENSOR_OUTPUT, "output" },
1492
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1493
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1494
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1495
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1496
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1497
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1498
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1499
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1500
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1501
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1502
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1503
+ { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
1504
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1505
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1506
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1507
+ },
1508
+ },
1320
1509
  {
1321
1510
  LLM_ARCH_UNKNOWN,
1322
1511
  {
@@ -1397,6 +1586,12 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1397
1586
  {LLM_TENSOR_SSM_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1398
1587
  {LLM_TENSOR_TIME_MIX_W1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1399
1588
  {LLM_TENSOR_TIME_MIX_W2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1589
+ {LLM_TENSOR_TIME_MIX_A1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1590
+ {LLM_TENSOR_TIME_MIX_A2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1591
+ {LLM_TENSOR_TIME_MIX_V1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1592
+ {LLM_TENSOR_TIME_MIX_V2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1593
+ {LLM_TENSOR_TIME_MIX_G1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1594
+ {LLM_TENSOR_TIME_MIX_G2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1400
1595
  {LLM_TENSOR_TIME_MIX_DECAY_W1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1401
1596
  {LLM_TENSOR_TIME_MIX_DECAY_W2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1402
1597
  {LLM_TENSOR_TIME_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
@@ -1415,6 +1610,9 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1415
1610
  {LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1416
1611
  {LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1417
1612
  {LLM_TENSOR_CHANNEL_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1613
+ {LLM_TENSOR_TIME_MIX_K_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1614
+ {LLM_TENSOR_TIME_MIX_K_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1615
+ {LLM_TENSOR_TIME_MIX_R_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1418
1616
  {LLM_TENSOR_TIME_MIX_LERP_W, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1419
1617
  {LLM_TENSOR_TIME_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1420
1618
  {LLM_TENSOR_TIME_MIX_LERP_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
@@ -1422,6 +1620,9 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1422
1620
  {LLM_TENSOR_TIME_MIX_LERP_G, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1423
1621
  {LLM_TENSOR_TIME_MIX_LERP_FUSED, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1424
1622
  {LLM_TENSOR_TIME_MIX_DECAY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1623
+ {LLM_TENSOR_TIME_MIX_W0, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1624
+ {LLM_TENSOR_TIME_MIX_A0, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1625
+ {LLM_TENSOR_TIME_MIX_V0, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1425
1626
  {LLM_TENSOR_TIME_MIX_FIRST, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_RWKV_WKV6}},
1426
1627
  {LLM_TENSOR_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1427
1628
  {LLM_TENSOR_ATTN_NORM_2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
package/cpp/llama-arch.h CHANGED
@@ -10,6 +10,7 @@
10
10
 
11
11
  enum llm_arch {
12
12
  LLM_ARCH_LLAMA,
13
+ LLM_ARCH_LLAMA4,
13
14
  LLM_ARCH_DECI,
14
15
  LLM_ARCH_FALCON,
15
16
  LLM_ARCH_BAICHUAN,
@@ -29,6 +30,8 @@ enum llm_arch {
29
30
  LLM_ARCH_QWEN2,
30
31
  LLM_ARCH_QWEN2MOE,
31
32
  LLM_ARCH_QWEN2VL,
33
+ LLM_ARCH_QWEN3,
34
+ LLM_ARCH_QWEN3MOE,
32
35
  LLM_ARCH_PHI2,
33
36
  LLM_ARCH_PHI3,
34
37
  LLM_ARCH_PHIMOE,
@@ -63,10 +66,14 @@ enum llm_arch {
63
66
  LLM_ARCH_EXAONE,
64
67
  LLM_ARCH_RWKV6,
65
68
  LLM_ARCH_RWKV6QWEN2,
69
+ LLM_ARCH_RWKV7,
70
+ LLM_ARCH_ARWKV7,
66
71
  LLM_ARCH_GRANITE,
67
72
  LLM_ARCH_GRANITE_MOE,
68
73
  LLM_ARCH_CHAMELEON,
69
74
  LLM_ARCH_WAVTOKENIZER_DEC,
75
+ LLM_ARCH_PLM,
76
+ LLM_ARCH_BAILINGMOE,
70
77
  LLM_ARCH_UNKNOWN,
71
78
  };
72
79
 
@@ -75,6 +82,7 @@ enum llm_kv {
75
82
  LLM_KV_GENERAL_ARCHITECTURE,
76
83
  LLM_KV_GENERAL_QUANTIZATION_VERSION,
77
84
  LLM_KV_GENERAL_ALIGNMENT,
85
+ LLM_KV_GENERAL_FILE_TYPE,
78
86
  LLM_KV_GENERAL_NAME,
79
87
  LLM_KV_GENERAL_AUTHOR,
80
88
  LLM_KV_GENERAL_VERSION,
@@ -113,6 +121,7 @@ enum llm_kv {
113
121
  LLM_KV_RESIDUAL_SCALE,
114
122
  LLM_KV_EMBEDDING_SCALE,
115
123
  LLM_KV_TOKEN_SHIFT_COUNT,
124
+ LLM_KV_INTERLEAVE_MOE_LAYER_STEP,
116
125
 
117
126
  LLM_KV_ATTENTION_HEAD_COUNT,
118
127
  LLM_KV_ATTENTION_HEAD_COUNT_KV,
@@ -127,6 +136,10 @@ enum llm_kv {
127
136
  LLM_KV_ATTENTION_CAUSAL,
128
137
  LLM_KV_ATTENTION_Q_LORA_RANK,
129
138
  LLM_KV_ATTENTION_KV_LORA_RANK,
139
+ LLM_KV_ATTENTION_DECAY_LORA_RANK,
140
+ LLM_KV_ATTENTION_ICLR_LORA_RANK,
141
+ LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK,
142
+ LLM_KV_ATTENTION_GATE_LORA_RANK,
130
143
  LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
131
144
  LLM_KV_ATTENTION_SLIDING_WINDOW,
132
145
  LLM_KV_ATTENTION_SCALE,
@@ -250,8 +263,20 @@ enum llm_tensor {
250
263
  LLM_TENSOR_SSM_A,
251
264
  LLM_TENSOR_SSM_D,
252
265
  LLM_TENSOR_SSM_OUT,
266
+ LLM_TENSOR_TIME_MIX_W0,
253
267
  LLM_TENSOR_TIME_MIX_W1,
254
268
  LLM_TENSOR_TIME_MIX_W2,
269
+ LLM_TENSOR_TIME_MIX_A0,
270
+ LLM_TENSOR_TIME_MIX_A1,
271
+ LLM_TENSOR_TIME_MIX_A2,
272
+ LLM_TENSOR_TIME_MIX_V0,
273
+ LLM_TENSOR_TIME_MIX_V1,
274
+ LLM_TENSOR_TIME_MIX_V2,
275
+ LLM_TENSOR_TIME_MIX_G1,
276
+ LLM_TENSOR_TIME_MIX_G2,
277
+ LLM_TENSOR_TIME_MIX_K_K,
278
+ LLM_TENSOR_TIME_MIX_K_A,
279
+ LLM_TENSOR_TIME_MIX_R_K,
255
280
  LLM_TENSOR_TIME_MIX_LERP_X,
256
281
  LLM_TENSOR_TIME_MIX_LERP_W,
257
282
  LLM_TENSOR_TIME_MIX_LERP_K,
package/cpp/llama-batch.h CHANGED
@@ -42,9 +42,9 @@ struct llama_sbatch {
42
42
  bool logits_all; // TODO: remove once lctx.logits_all is removed too
43
43
 
44
44
  // sorted indices into the batch
45
- std::vector<size_t> ids;
45
+ std::vector<int64_t> ids;
46
46
  // batch indices of the output
47
- std::vector<size_t> out_ids;
47
+ std::vector<int64_t> out_ids;
48
48
  std::vector<llama_sbatch_seq> seq;
49
49
 
50
50
  const llama_batch * batch = nullptr;
@@ -59,6 +59,9 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
59
59
  { "granite", LLM_CHAT_TEMPLATE_GRANITE },
60
60
  { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
61
61
  { "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
62
+ { "yandex", LLM_CHAT_TEMPLATE_YANDEX },
63
+ { "bailing", LLM_CHAT_TEMPLATE_BAILING },
64
+ { "llama4", LLM_CHAT_TEMPLATE_LLAMA4 },
62
65
  };
63
66
 
64
67
  llm_chat_template llm_chat_template_from_str(const std::string & name) {
@@ -168,6 +171,12 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
168
171
  return LLM_CHAT_TEMPLATE_GIGACHAT;
169
172
  } else if (tmpl_contains("<|role_start|>")) {
170
173
  return LLM_CHAT_TEMPLATE_MEGREZ;
174
+ } else if (tmpl_contains(" Ассистент:")) {
175
+ return LLM_CHAT_TEMPLATE_YANDEX;
176
+ } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("'HUMAN'")) {
177
+ return LLM_CHAT_TEMPLATE_BAILING;
178
+ } else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) {
179
+ return LLM_CHAT_TEMPLATE_LLAMA4;
171
180
  }
172
181
  return LLM_CHAT_TEMPLATE_UNKNOWN;
173
182
  }
@@ -567,7 +576,51 @@ int32_t llm_chat_apply_template(
567
576
  if (add_ass) {
568
577
  ss << "<|role_start|>assistant<|role_end|>";
569
578
  }
570
- } else {
579
+ } else if (tmpl == LLM_CHAT_TEMPLATE_YANDEX) {
580
+ // Yandex template ("\n\n" is defined as EOT token)
581
+
582
+ ss << "<s>";
583
+
584
+ for (size_t i = 0; i < chat.size(); i++) {
585
+ std::string role(chat[i]->role);
586
+ if (role == "user") {
587
+ ss << " Пользователь: " << chat[i]->content << "\n\n";
588
+ } else if (role == "assistant") {
589
+ ss << " Ассистент: " << chat[i]->content << "\n\n";
590
+ }
591
+ }
592
+
593
+ // Add generation prompt if needed
594
+ if (add_ass) {
595
+ ss << " Ассистент:[SEP]";
596
+ }
597
+ } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING) {
598
+ // Bailing (Ling) template
599
+ for (auto message : chat) {
600
+ std::string role(message->role);
601
+
602
+ if (role == "user") {
603
+ role = "HUMAN";
604
+ } else {
605
+ std::transform(role.begin(), role.end(), role.begin(), ::toupper);
606
+ }
607
+
608
+ ss << "<role>" << role << "</role>" << message->content;
609
+ }
610
+
611
+ if (add_ass) {
612
+ ss << "<role>ASSISTANT</role>";
613
+ }
614
+ } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA4) {
615
+ // Llama 4
616
+ for (auto message : chat) {
617
+ std::string role(message->role);
618
+ ss << "<|header_start|>" << role << "<|header_end|>\n\n" << trim(message->content) << "<|eot|>";
619
+ }
620
+ if (add_ass) {
621
+ ss << "<|header_start|>assistant<|header_end|>\n\n";
622
+ }
623
+ } else {
571
624
  // template not supported
572
625
  return -1;
573
626
  }
@@ -585,4 +638,3 @@ int32_t llama_chat_builtin_templates(const char ** output, size_t len) {
585
638
  }
586
639
  return (int32_t) LLM_CHAT_TEMPLATES.size();
587
640
  }
588
-
package/cpp/llama-chat.h CHANGED
@@ -38,6 +38,9 @@ enum llm_chat_template {
38
38
  LLM_CHAT_TEMPLATE_GRANITE,
39
39
  LLM_CHAT_TEMPLATE_GIGACHAT,
40
40
  LLM_CHAT_TEMPLATE_MEGREZ,
41
+ LLM_CHAT_TEMPLATE_YANDEX,
42
+ LLM_CHAT_TEMPLATE_BAILING,
43
+ LLM_CHAT_TEMPLATE_LLAMA4,
41
44
  LLM_CHAT_TEMPLATE_UNKNOWN,
42
45
  };
43
46