cui-llama.rn 1.7.3 → 1.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. package/README.md +217 -17
  2. package/android/src/main/CMakeLists.txt +34 -15
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +94 -8
  4. package/android/src/main/java/com/rnllama/RNLlama.java +247 -0
  5. package/android/src/main/jni.cpp +213 -14
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  14. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
  15. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
  16. package/cpp/README.md +1 -1
  17. package/cpp/chat-parser.cpp +385 -0
  18. package/cpp/chat-parser.h +120 -0
  19. package/cpp/chat.cpp +726 -596
  20. package/cpp/chat.h +71 -6
  21. package/cpp/common.cpp +56 -38
  22. package/cpp/common.h +9 -3
  23. package/cpp/ggml-backend-reg.cpp +5 -0
  24. package/cpp/ggml-backend.cpp +10 -2
  25. package/cpp/ggml-common.h +4 -0
  26. package/cpp/ggml-cpu/amx/amx.cpp +1 -1
  27. package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
  28. package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  29. package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
  30. package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
  31. package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  32. package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
  33. package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
  34. package/cpp/ggml-cpu/arch-fallback.h +184 -0
  35. package/cpp/ggml-cpu/common.h +4 -3
  36. package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
  37. package/cpp/ggml-cpu/ggml-cpu.c +123 -104
  38. package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
  39. package/cpp/ggml-cpu/ops.cpp +330 -148
  40. package/cpp/ggml-cpu/ops.h +1 -0
  41. package/cpp/ggml-cpu/quants.c +1158 -0
  42. package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
  43. package/cpp/ggml-cpu/repack.cpp +1571 -0
  44. package/cpp/ggml-cpu/repack.h +98 -0
  45. package/cpp/ggml-cpu/simd-mappings.h +330 -38
  46. package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
  47. package/cpp/ggml-cpu/vec.cpp +87 -18
  48. package/cpp/ggml-cpu/vec.h +249 -94
  49. package/cpp/ggml-cpu.h +1 -0
  50. package/cpp/ggml-impl.h +63 -183
  51. package/cpp/ggml-llama-sim.metallib +0 -0
  52. package/cpp/ggml-llama.metallib +0 -0
  53. package/cpp/ggml-metal.m +152 -45
  54. package/cpp/ggml-quants.c +0 -2
  55. package/cpp/ggml.c +61 -21
  56. package/cpp/ggml.h +22 -3
  57. package/cpp/gguf.cpp +24 -3
  58. package/cpp/json-partial.cpp +256 -0
  59. package/cpp/json-partial.h +38 -0
  60. package/cpp/json-schema-to-grammar.cpp +5 -47
  61. package/cpp/json-schema-to-grammar.h +4 -4
  62. package/cpp/llama-arch.cpp +153 -3
  63. package/cpp/llama-arch.h +27 -1
  64. package/cpp/llama-batch.cpp +741 -272
  65. package/cpp/llama-batch.h +112 -54
  66. package/cpp/llama-chat.cpp +30 -8
  67. package/cpp/llama-chat.h +1 -0
  68. package/cpp/llama-context.cpp +524 -339
  69. package/cpp/llama-context.h +38 -17
  70. package/cpp/llama-cparams.cpp +4 -0
  71. package/cpp/llama-cparams.h +2 -0
  72. package/cpp/llama-grammar.cpp +12 -2
  73. package/cpp/llama-graph.cpp +431 -356
  74. package/cpp/llama-graph.h +126 -58
  75. package/cpp/llama-hparams.cpp +10 -2
  76. package/cpp/llama-hparams.h +19 -2
  77. package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
  78. package/cpp/llama-kv-cache-unified-iswa.h +128 -0
  79. package/cpp/llama-kv-cache-unified.cpp +1841 -0
  80. package/cpp/llama-kv-cache-unified.h +303 -0
  81. package/cpp/llama-kv-cells.h +439 -0
  82. package/cpp/llama-memory-hybrid.cpp +246 -0
  83. package/cpp/llama-memory-hybrid.h +138 -0
  84. package/cpp/llama-memory-recurrent.cpp +1112 -0
  85. package/cpp/llama-memory-recurrent.h +183 -0
  86. package/cpp/llama-memory.cpp +41 -0
  87. package/cpp/llama-memory.h +86 -5
  88. package/cpp/llama-mmap.cpp +1 -1
  89. package/cpp/llama-model-loader.cpp +42 -17
  90. package/cpp/llama-model-saver.cpp +1 -0
  91. package/cpp/llama-model.cpp +1639 -513
  92. package/cpp/llama-model.h +26 -0
  93. package/cpp/llama-sampling.cpp +2 -2
  94. package/cpp/llama-vocab.cpp +65 -28
  95. package/cpp/llama-vocab.h +1 -0
  96. package/cpp/llama.cpp +11 -7
  97. package/cpp/llama.h +150 -42
  98. package/cpp/minja/chat-template.hpp +1 -1
  99. package/cpp/minja/minja.hpp +1 -1
  100. package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
  101. package/cpp/nlohmann/json_fwd.hpp +187 -0
  102. package/cpp/regex-partial.cpp +204 -0
  103. package/cpp/regex-partial.h +56 -0
  104. package/cpp/rn-llama.cpp +646 -35
  105. package/cpp/rn-llama.h +32 -1
  106. package/cpp/rn-tts.h +39 -0
  107. package/cpp/sampling.cpp +7 -8
  108. package/cpp/tools/mtmd/clip-impl.h +5 -0
  109. package/cpp/tools/mtmd/clip.cpp +572 -436
  110. package/cpp/tools/mtmd/clip.h +14 -4
  111. package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
  112. package/cpp/tools/mtmd/mtmd-audio.h +2 -17
  113. package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
  114. package/cpp/tools/mtmd/mtmd-helper.h +91 -0
  115. package/cpp/tools/mtmd/mtmd.cpp +368 -248
  116. package/cpp/tools/mtmd/mtmd.h +6 -70
  117. package/cpp/unicode.cpp +5 -0
  118. package/ios/CMakeLists.txt +26 -6
  119. package/ios/RNLlama.h +1 -1
  120. package/ios/RNLlama.mm +153 -3
  121. package/ios/RNLlamaContext.h +9 -1
  122. package/ios/RNLlamaContext.mm +112 -9
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
  131. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  132. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
  133. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
  134. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  135. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
  136. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
  137. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
  138. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
  139. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  140. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  141. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  142. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  143. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  144. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
  145. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
  146. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
  147. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
  148. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  149. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
  150. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
  151. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  152. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
  153. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
  154. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
  155. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  156. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
  160. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
  161. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
  162. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
  163. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
  164. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
  165. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  166. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
  167. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
  168. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  169. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
  170. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
  171. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
  172. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
  173. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  174. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  175. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  176. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  177. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  178. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
  179. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
  180. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
  181. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
  182. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  183. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
  184. package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
  185. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  186. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
  187. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
  188. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
  189. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  190. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  191. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
  192. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
  193. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
  194. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
  195. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
  196. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
  197. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
  198. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
  199. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  200. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
  201. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
  202. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  203. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
  204. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
  205. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
  206. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
  207. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  208. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  209. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  210. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  211. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  212. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
  213. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
  214. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
  215. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
  216. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  217. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
  218. package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
  219. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  220. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
  221. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
  222. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
  223. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  224. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  225. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
  226. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
  227. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
  228. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
  229. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
  230. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
  231. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
  232. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
  233. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  234. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
  235. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
  236. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  237. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
  238. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
  239. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
  240. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
  241. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  242. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  243. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  244. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  245. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  246. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
  247. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
  248. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
  249. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
  250. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  251. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
  252. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
  253. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  254. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
  255. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
  256. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
  257. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  258. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  259. package/jest/mock.js +24 -0
  260. package/package.json +1 -1
  261. package/src/NativeRNLlama.ts +46 -2
  262. package/src/index.ts +105 -1
  263. package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  264. package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
  265. package/cpp/ggml-cpu/sgemm.cpp +0 -3544
  266. package/cpp/ggml-cpu/sgemm.h +0 -14
  267. package/cpp/llama-kv-cache.cpp +0 -2827
  268. package/cpp/llama-kv-cache.h +0 -515
  269. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  270. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  271. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  272. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
  273. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  274. /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
  275. /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
  276. /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
@@ -20,6 +20,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
20
20
  { LLM_ARCH_BERT, "bert" },
21
21
  { LLM_ARCH_NOMIC_BERT, "nomic-bert" },
22
22
  { LLM_ARCH_NOMIC_BERT_MOE, "nomic-bert-moe" },
23
+ { LLM_ARCH_NEO_BERT, "neo-bert" },
23
24
  { LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" },
24
25
  { LLM_ARCH_BLOOM, "bloom" },
25
26
  { LLM_ARCH_STABLELM, "stablelm" },
@@ -41,6 +42,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
41
42
  { LLM_ARCH_GEMMA, "gemma" },
42
43
  { LLM_ARCH_GEMMA2, "gemma2" },
43
44
  { LLM_ARCH_GEMMA3, "gemma3" },
45
+ { LLM_ARCH_GEMMA3N, "gemma3n" },
44
46
  { LLM_ARCH_STARCODER2, "starcoder2" },
45
47
  { LLM_ARCH_MAMBA, "mamba" },
46
48
  { LLM_ARCH_XVERSE, "xverse" },
@@ -72,6 +74,8 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
72
74
  { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
73
75
  { LLM_ARCH_PLM, "plm" },
74
76
  { LLM_ARCH_BAILINGMOE, "bailingmoe" },
77
+ { LLM_ARCH_DOTS1, "dots1" },
78
+ { LLM_ARCH_ARCEE, "arcee" },
75
79
  { LLM_ARCH_UNKNOWN, "(unknown)" },
76
80
  };
77
81
 
@@ -144,6 +148,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
144
148
  { LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
145
149
  { LLM_KV_ATTENTION_KEY_LENGTH_MLA, "%s.attention.key_length_mla" },
146
150
  { LLM_KV_ATTENTION_VALUE_LENGTH_MLA, "%s.attention.value_length_mla" },
151
+ { LLM_KV_ATTENTION_LAYER_INDICES, "%s.attention.layer_indices" },
147
152
 
148
153
  { LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
149
154
  { LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
@@ -174,6 +179,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
174
179
  { LLM_KV_CONVNEXT_EMBEDDING_LENGTH, "%s.convnext.embedding_length" },
175
180
  { LLM_KV_CONVNEXT_BLOCK_COUNT, "%s.convnext.block_count" },
176
181
 
182
+ { LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" },
183
+
177
184
  { LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
178
185
  { LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
179
186
  { LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
@@ -192,13 +199,13 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
192
199
  { LLM_KV_TOKENIZER_MASK_ID, "tokenizer.ggml.mask_token_id" },
193
200
  { LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" },
194
201
  { LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" },
202
+ { LLM_KV_TOKENIZER_ADD_SEP, "tokenizer.ggml.add_sep_token" },
195
203
  { LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" },
196
204
  { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, "tokenizer.ggml.remove_extra_whitespaces" },
197
205
  { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" },
198
206
  { LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" },
199
207
  { LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" },
200
208
  { LLM_KV_TOKENIZER_CHAT_TEMPLATE, "tokenizer.chat_template" },
201
- { LLM_KV_TOKENIZER_CHAT_TEMPLATE_N, "tokenizer.chat_template.%s" },
202
209
  { LLM_KV_TOKENIZER_FIM_PRE_ID, "tokenizer.ggml.fim_pre_token_id" },
203
210
  { LLM_KV_TOKENIZER_FIM_SUF_ID, "tokenizer.ggml.fim_suf_token_id" },
204
211
  { LLM_KV_TOKENIZER_FIM_MID_ID, "tokenizer.ggml.fim_mid_token_id" },
@@ -242,6 +249,24 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
242
249
  { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
243
250
  },
244
251
  },
252
+ {
253
+ LLM_ARCH_ARCEE,
254
+ {
255
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
256
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
257
+ { LLM_TENSOR_OUTPUT, "output" },
258
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
259
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
260
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
261
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
262
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
263
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
264
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
265
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
266
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
267
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
268
+ },
269
+ },
245
270
  {
246
271
  LLM_ARCH_LLAMA4,
247
272
  {
@@ -448,6 +473,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
448
473
  { LLM_TENSOR_TOKEN_TYPES, "token_types" },
449
474
  { LLM_TENSOR_POS_EMBD, "position_embd" },
450
475
  { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
476
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
451
477
  { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
452
478
  { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
453
479
  { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
@@ -492,6 +518,21 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
492
518
  { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
493
519
  },
494
520
  },
521
+ {
522
+ LLM_ARCH_NEO_BERT,
523
+ {
524
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
525
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
526
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
527
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
528
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
529
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
530
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
531
+ { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
532
+ { LLM_TENSOR_CLS, "cls" },
533
+ { LLM_TENSOR_CLS_OUT, "cls.output" },
534
+ },
535
+ },
495
536
  {
496
537
  LLM_ARCH_JINA_BERT_V2,
497
538
  {
@@ -892,6 +933,42 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
892
933
  { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
893
934
  },
894
935
  },
936
+ {
937
+ LLM_ARCH_GEMMA3N,
938
+ {
939
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
940
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
941
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
942
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
943
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
944
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
945
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
946
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
947
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
948
+ { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
949
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
950
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
951
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
952
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
953
+ { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
954
+ { LLM_TENSOR_PER_LAYER_TOKEN_EMBD, "per_layer_token_embd" },
955
+ { LLM_TENSOR_PER_LAYER_MODEL_PROJ, "per_layer_model_proj" },
956
+ { LLM_TENSOR_PER_LAYER_PROJ_NORM, "per_layer_proj_norm" },
957
+ { LLM_TENSOR_ALTUP_UNEMBD_PROJ, "altup_unembd_proj" },
958
+ { LLM_TENSOR_ALTUP_PROJ, "altup_proj" },
959
+ { LLM_TENSOR_PER_LAYER_INP_GATE, "blk.%d.inp_gate" },
960
+ { LLM_TENSOR_PER_LAYER_PROJ, "blk.%d.proj" },
961
+ { LLM_TENSOR_PER_LAYER_POST_NORM, "blk.%d.post_norm" },
962
+ { LLM_TENSOR_ALTUP_CORRECT_COEF, "blk.%d.altup_correct_coef" },
963
+ { LLM_TENSOR_ALTUP_CORRECT_SCALE, "blk.%d.altup_correct_scale" },
964
+ { LLM_TENSOR_ALTUP_PREDICT_COEF, "blk.%d.altup_predict_coef" },
965
+ { LLM_TENSOR_ALTUP_ROUTER, "blk.%d.altup_router" },
966
+ { LLM_TENSOR_ALTUP_ROUTER_NORM, "blk.%d.altup_router_norm" },
967
+ { LLM_TENSOR_LAUREL_L, "blk.%d.laurel_l" },
968
+ { LLM_TENSOR_LAUREL_R, "blk.%d.laurel_r" },
969
+ { LLM_TENSOR_LAUREL_POST_NORM, "blk.%d.laurel_post_norm" },
970
+ },
971
+ },
895
972
  {
896
973
  LLM_ARCH_STARCODER2,
897
974
  {
@@ -1553,6 +1630,34 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1553
1630
  { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1554
1631
  },
1555
1632
  },
1633
+ {
1634
+ LLM_ARCH_DOTS1,
1635
+ {
1636
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1637
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1638
+ { LLM_TENSOR_OUTPUT, "output" },
1639
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1640
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1641
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1642
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1643
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1644
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1645
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1646
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1647
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1648
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1649
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1650
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1651
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1652
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1653
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1654
+ { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
1655
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1656
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1657
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1658
+ { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
1659
+ }
1660
+ },
1556
1661
  {
1557
1662
  LLM_ARCH_UNKNOWN,
1558
1663
  {
@@ -1681,6 +1786,23 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1681
1786
  {LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1682
1787
  {LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1683
1788
  {LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1789
+ // altup / laurel (gemma 3n)
1790
+ {LLM_TENSOR_PER_LAYER_TOKEN_EMBD, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_GET_ROWS}},
1791
+ {LLM_TENSOR_PER_LAYER_MODEL_PROJ, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1792
+ {LLM_TENSOR_PER_LAYER_PROJ_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1793
+ {LLM_TENSOR_ALTUP_PROJ, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1794
+ {LLM_TENSOR_ALTUP_UNEMBD_PROJ, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1795
+ {LLM_TENSOR_PER_LAYER_INP_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1796
+ {LLM_TENSOR_PER_LAYER_PROJ, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1797
+ {LLM_TENSOR_PER_LAYER_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1798
+ {LLM_TENSOR_ALTUP_CORRECT_COEF, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1799
+ {LLM_TENSOR_ALTUP_CORRECT_SCALE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1800
+ {LLM_TENSOR_ALTUP_PREDICT_COEF, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1801
+ {LLM_TENSOR_ALTUP_ROUTER, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1802
+ {LLM_TENSOR_ALTUP_ROUTER_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1803
+ {LLM_TENSOR_LAUREL_L, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1804
+ {LLM_TENSOR_LAUREL_R, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1805
+ {LLM_TENSOR_LAUREL_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1684
1806
  // this tensor is loaded for T5, but never used
1685
1807
  {LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_NONE}},
1686
1808
  {LLM_TENSOR_CONV1D, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_IM2COL}},
@@ -1704,8 +1826,14 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1704
1826
  LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
1705
1827
 
1706
1828
  std::string LLM_KV::operator()(llm_kv kv) const {
1707
- return suffix ? ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch), suffix)
1708
- : ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
1829
+ std::string name = ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
1830
+
1831
+ if (suffix != nullptr) {
1832
+ name += ".";
1833
+ name += suffix;
1834
+ }
1835
+
1836
+ return name;
1709
1837
  }
1710
1838
 
1711
1839
  std::string LLM_TN_IMPL::str() const {
@@ -1744,3 +1872,25 @@ llm_arch llm_arch_from_string(const std::string & name) {
1744
1872
  const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
1745
1873
  return LLM_TENSOR_INFOS.at(tensor);
1746
1874
  }
1875
+
1876
+ bool llm_arch_is_recurrent(const llm_arch & arch) {
1877
+ switch (arch) {
1878
+ case LLM_ARCH_MAMBA:
1879
+ case LLM_ARCH_RWKV6:
1880
+ case LLM_ARCH_RWKV6QWEN2:
1881
+ case LLM_ARCH_RWKV7:
1882
+ case LLM_ARCH_ARWKV7:
1883
+ return true;
1884
+ default:
1885
+ return false;
1886
+ }
1887
+ }
1888
+
1889
+ bool llm_arch_is_hybrid(const llm_arch & arch) {
1890
+ // TODO: There are currently no hybrid models! Once there are, this will be
1891
+ // the place to identify them
1892
+ switch (arch) {
1893
+ default:
1894
+ return false;
1895
+ }
1896
+ }
package/cpp/llama-arch.h CHANGED
@@ -24,6 +24,7 @@ enum llm_arch {
24
24
  LLM_ARCH_BERT,
25
25
  LLM_ARCH_NOMIC_BERT,
26
26
  LLM_ARCH_NOMIC_BERT_MOE,
27
+ LLM_ARCH_NEO_BERT,
27
28
  LLM_ARCH_JINA_BERT_V2,
28
29
  LLM_ARCH_BLOOM,
29
30
  LLM_ARCH_STABLELM,
@@ -45,6 +46,7 @@ enum llm_arch {
45
46
  LLM_ARCH_GEMMA,
46
47
  LLM_ARCH_GEMMA2,
47
48
  LLM_ARCH_GEMMA3,
49
+ LLM_ARCH_GEMMA3N,
48
50
  LLM_ARCH_STARCODER2,
49
51
  LLM_ARCH_MAMBA,
50
52
  LLM_ARCH_XVERSE,
@@ -76,6 +78,8 @@ enum llm_arch {
76
78
  LLM_ARCH_WAVTOKENIZER_DEC,
77
79
  LLM_ARCH_PLM,
78
80
  LLM_ARCH_BAILINGMOE,
81
+ LLM_ARCH_DOTS1,
82
+ LLM_ARCH_ARCEE,
79
83
  LLM_ARCH_UNKNOWN,
80
84
  };
81
85
 
@@ -148,6 +152,7 @@ enum llm_kv {
148
152
  LLM_KV_ATTENTION_SCALE,
149
153
  LLM_KV_ATTENTION_KEY_LENGTH_MLA,
150
154
  LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
155
+ LLM_KV_ATTENTION_LAYER_INDICES,
151
156
 
152
157
  LLM_KV_ROPE_DIMENSION_COUNT,
153
158
  LLM_KV_ROPE_DIMENSION_SECTIONS,
@@ -190,13 +195,13 @@ enum llm_kv {
190
195
  LLM_KV_TOKENIZER_MASK_ID,
191
196
  LLM_KV_TOKENIZER_ADD_BOS,
192
197
  LLM_KV_TOKENIZER_ADD_EOS,
198
+ LLM_KV_TOKENIZER_ADD_SEP,
193
199
  LLM_KV_TOKENIZER_ADD_PREFIX,
194
200
  LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
195
201
  LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
196
202
  LLM_KV_TOKENIZER_HF_JSON,
197
203
  LLM_KV_TOKENIZER_RWKV,
198
204
  LLM_KV_TOKENIZER_CHAT_TEMPLATE,
199
- LLM_KV_TOKENIZER_CHAT_TEMPLATE_N,
200
205
  LLM_KV_TOKENIZER_FIM_PRE_ID,
201
206
  LLM_KV_TOKENIZER_FIM_SUF_ID,
202
207
  LLM_KV_TOKENIZER_FIM_MID_ID,
@@ -213,6 +218,8 @@ enum llm_kv {
213
218
  LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
214
219
  LLM_KV_CONVNEXT_BLOCK_COUNT,
215
220
 
221
+ LLM_KV_CLASSIFIER_OUTPUT_LABELS,
222
+
216
223
  // deprecated:
217
224
  LLM_KV_TOKENIZER_PREFIX_ID,
218
225
  LLM_KV_TOKENIZER_SUFFIX_ID,
@@ -263,6 +270,22 @@ enum llm_tensor {
263
270
  LLM_TENSOR_LAYER_OUT_NORM,
264
271
  LLM_TENSOR_POST_ATTN_NORM,
265
272
  LLM_TENSOR_POST_MLP_NORM,
273
+ LLM_TENSOR_PER_LAYER_TOKEN_EMBD, // gemma3n
274
+ LLM_TENSOR_PER_LAYER_MODEL_PROJ, // gemma3n
275
+ LLM_TENSOR_PER_LAYER_INP_GATE, // gemma3n
276
+ LLM_TENSOR_PER_LAYER_PROJ, // gemma3n
277
+ LLM_TENSOR_PER_LAYER_PROJ_NORM, // gemma3n
278
+ LLM_TENSOR_PER_LAYER_POST_NORM, // gemma3n
279
+ LLM_TENSOR_ALTUP_PROJ, // gemma3n
280
+ LLM_TENSOR_ALTUP_UNEMBD_PROJ, // gemma3n
281
+ LLM_TENSOR_ALTUP_CORRECT_COEF, // gemma3n
282
+ LLM_TENSOR_ALTUP_CORRECT_SCALE, // gemma3n
283
+ LLM_TENSOR_ALTUP_PREDICT_COEF, // gemma3n
284
+ LLM_TENSOR_ALTUP_ROUTER, // gemma3n
285
+ LLM_TENSOR_ALTUP_ROUTER_NORM, // gemma3n
286
+ LLM_TENSOR_LAUREL_L, // gemma3n
287
+ LLM_TENSOR_LAUREL_R, // gemma3n
288
+ LLM_TENSOR_LAUREL_POST_NORM, // gemma3n
266
289
  LLM_TENSOR_SSM_IN,
267
290
  LLM_TENSOR_SSM_CONV1D,
268
291
  LLM_TENSOR_SSM_X,
@@ -435,3 +458,6 @@ const char * llm_arch_name(llm_arch arch);
435
458
  llm_arch llm_arch_from_string(const std::string & name);
436
459
 
437
460
  const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
461
+
462
+ bool llm_arch_is_recurrent(const llm_arch & arch);
463
+ bool llm_arch_is_hybrid (const llm_arch & arch);