@novastera-oss/llamarn 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. package/android/src/main/cpp/include/llama.h +8 -3
  2. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  3. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  6. package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
  7. package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
  8. package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
  9. package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
  10. package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
  11. package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
  12. package/android/src/main/jniLibs/x86/libggml.so +0 -0
  13. package/android/src/main/jniLibs/x86/libllama.so +0 -0
  14. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  15. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  16. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  17. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  18. package/cpp/LlamaCppModel.cpp +56 -22
  19. package/cpp/build-info.cpp +2 -2
  20. package/cpp/llama.cpp/CMakeLists.txt +1 -2
  21. package/cpp/llama.cpp/README.md +4 -5
  22. package/cpp/llama.cpp/build-xcframework.sh +1 -1
  23. package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
  24. package/cpp/llama.cpp/common/arg.cpp +24 -0
  25. package/cpp/llama.cpp/common/chat.cpp +37 -20
  26. package/cpp/llama.cpp/common/chat.h +2 -0
  27. package/cpp/llama.cpp/common/common.cpp +3 -0
  28. package/cpp/llama.cpp/common/common.h +5 -0
  29. package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
  30. package/cpp/llama.cpp/convert_hf_to_gguf.py +860 -23
  31. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +9 -0
  32. package/cpp/llama.cpp/ggml/CMakeLists.txt +8 -2
  33. package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
  34. package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
  35. package/cpp/llama.cpp/ggml/include/ggml.h +206 -10
  36. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +17 -1
  37. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -8
  38. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +36 -18
  39. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +68 -5
  40. package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +1 -1
  41. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +16 -2
  42. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +37 -3
  43. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +10 -9
  44. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +109 -108
  45. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +1027 -1038
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +53 -52
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +56 -55
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +42 -41
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +24 -23
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +29 -28
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +30 -29
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +83 -82
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +20 -19
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +3 -2
  56. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +9 -3
  57. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +111 -103
  58. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
  59. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3 -2
  60. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1405 -240
  61. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +8 -0
  62. package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +25 -24
  63. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +56 -40
  64. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +212 -34
  65. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +35 -11
  66. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +187 -54
  67. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +71 -29
  68. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
  69. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
  70. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
  71. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
  72. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +22 -0
  73. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
  74. package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
  75. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +4 -1
  76. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +8 -4
  77. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +6 -4
  78. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +14 -12
  79. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +5 -3
  80. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +15 -10
  81. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +12 -6
  82. package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +269 -110
  84. package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
  85. package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
  86. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +2 -8
  87. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
  88. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
  89. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
  90. package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
  91. package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
  92. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
  93. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
  94. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
  95. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
  96. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +97 -0
  97. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +11 -0
  98. package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
  99. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +14 -5
  100. package/cpp/llama.cpp/ggml/src/ggml-impl.h +125 -183
  101. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
  102. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +51 -9
  103. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +394 -80
  104. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +616 -239
  105. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
  106. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +3 -0
  107. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +741 -571
  108. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
  109. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
  110. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  111. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
  112. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
  113. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
  114. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
  115. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
  116. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
  117. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
  118. package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
  119. package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
  120. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
  121. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -24
  122. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
  123. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
  124. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +99 -166
  125. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +94 -72
  126. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
  127. package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
  128. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +697 -1098
  129. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
  130. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +6 -9
  131. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +104 -62
  132. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
  133. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
  134. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
  135. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +132 -201
  136. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
  137. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +39 -38
  138. package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
  139. package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
  140. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
  141. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
  142. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
  143. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
  144. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
  145. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +12 -1
  146. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +767 -292
  147. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -0
  148. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +58 -7
  149. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
  150. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
  151. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
  152. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
  153. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
  154. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
  155. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
  156. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
  157. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
  158. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
  159. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
  160. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
  161. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
  162. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
  163. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +12 -3
  164. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
  165. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
  166. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
  167. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
  168. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
  169. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
  170. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
  171. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
  172. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +23 -3
  173. package/cpp/llama.cpp/ggml/src/ggml.c +449 -72
  174. package/cpp/llama.cpp/ggml/src/gguf.cpp +13 -2
  175. package/cpp/llama.cpp/gguf-py/gguf/constants.py +285 -0
  176. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +27 -0
  177. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +137 -21
  178. package/cpp/llama.cpp/gguf-py/gguf/vocab.py +109 -7
  179. package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
  180. package/cpp/llama.cpp/include/llama.h +8 -43
  181. package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
  182. package/cpp/llama.cpp/src/llama-arch.cpp +265 -3
  183. package/cpp/llama.cpp/src/llama-arch.h +36 -1
  184. package/cpp/llama.cpp/src/llama-batch.cpp +596 -359
  185. package/cpp/llama.cpp/src/llama-batch.h +105 -70
  186. package/cpp/llama.cpp/src/llama-chat.cpp +26 -6
  187. package/cpp/llama.cpp/src/llama-chat.h +1 -0
  188. package/cpp/llama.cpp/src/llama-context.cpp +101 -107
  189. package/cpp/llama.cpp/src/llama-context.h +13 -13
  190. package/cpp/llama.cpp/src/llama-graph.cpp +286 -404
  191. package/cpp/llama.cpp/src/llama-graph.h +78 -79
  192. package/cpp/llama.cpp/src/llama-hparams.cpp +11 -1
  193. package/cpp/llama.cpp/src/llama-hparams.h +11 -0
  194. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +74 -66
  195. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +23 -26
  196. package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +312 -157
  197. package/cpp/llama.cpp/src/llama-kv-cache-unified.h +79 -46
  198. package/cpp/llama.cpp/src/llama-kv-cells.h +97 -21
  199. package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +73 -69
  200. package/cpp/llama.cpp/src/llama-memory-hybrid.h +19 -22
  201. package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +88 -77
  202. package/cpp/llama.cpp/src/llama-memory-recurrent.h +15 -20
  203. package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
  204. package/cpp/llama.cpp/src/llama-memory.h +21 -22
  205. package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
  206. package/cpp/llama.cpp/src/llama-model.cpp +5301 -2922
  207. package/cpp/llama.cpp/src/llama-model.h +40 -0
  208. package/cpp/llama.cpp/src/llama-quant.cpp +88 -5
  209. package/cpp/llama.cpp/src/llama-vocab.cpp +37 -3
  210. package/cpp/llama.cpp/src/llama-vocab.h +42 -0
  211. package/cpp/rn-utils.h +3 -0
  212. package/ios/include/chat.h +2 -0
  213. package/ios/include/common.h +5 -0
  214. package/ios/include/llama.h +8 -43
  215. package/ios/libs/llama.xcframework/Info.plist +19 -19
  216. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  217. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4863
  218. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  219. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  220. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +206 -10
  221. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +8 -43
  222. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  223. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  224. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
  225. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3742
  226. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  227. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  228. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
  229. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
  230. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  231. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  232. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
  233. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3891 -3744
  234. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
  235. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
  236. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +206 -10
  237. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +8 -43
  238. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
  239. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
  240. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +206 -10
  241. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +8 -43
  242. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  243. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
  244. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
  245. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +206 -10
  246. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +8 -43
  247. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  248. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  249. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  250. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4863
  251. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  252. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  253. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +206 -10
  254. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +8 -43
  255. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  256. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  257. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
  258. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3742
  259. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  260. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  261. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
  262. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
  263. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  264. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  265. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5095 -4900
  266. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  267. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  268. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +206 -10
  269. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +8 -43
  270. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  271. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  272. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5066 -4871
  273. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3919 -3773
  274. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  275. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  276. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
  277. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
  278. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  279. package/package.json +1 -1
  280. package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
  281. package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  282. package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  283. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
  284. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
  285. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
  286. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
  287. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
  288. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
  289. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
  290. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
  291. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
  292. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
  293. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
  294. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
  295. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
  296. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
  297. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
  298. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
  299. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
  300. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
  301. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
  302. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
  303. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
  304. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
  305. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
  306. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
  307. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
  308. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
  309. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
  310. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
  311. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
  312. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
  313. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
  314. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
  315. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
  316. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
  317. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
  318. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
  319. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
@@ -0,0 +1,124 @@
1
+ {%- set today = strftime_now("%Y-%m-%d") %}
2
+ {%- set default_system_message = "You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.\nYour knowledge base was last updated on 2023-10-01. The current date is " + today + ".\n\nWhen you're not sure about some information or when the user's request requires up-to-date or specific data, you must use the available tools to fetch the information. Do not hesitate to use tools whenever they can provide a more accurate or complete response. If no relevant tools are available, then clearly state that you don't have the information and avoid making up anything.
3
+
4
+ If the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. \"What are some good restaurants around me?\" => \"Where are you?\" or \"When is the next flight to Tokyo\" => \"Where do you travel from?\").
5
+ You are always very attentive to dates, and when asked about information at specific dates, you discard information that is at another date.
6
+ You follow these instructions in all languages, and always respond to the user in the language they use or request.
7
+ Next sections describe the capabilities that you have.
8
+
9
+ # WEB BROWSING INSTRUCTIONS
10
+
11
+ You cannot perform any web search or access internet to open URLs, links etc. If it seems like the user is expecting you to do so, you clarify the situation and ask the user to copy paste the text directly in the chat.
12
+
13
+ # MULTI-MODAL INSTRUCTIONS
14
+
15
+ You have the ability to read images, but you cannot generate images. You also cannot transcribe audio files or videos.
16
+ You cannot read nor transcribe audio files or videos.
17
+
18
+ # TOOL CALLING INSTRUCTIONS
19
+
20
+ You may have access to tools that you can use to fetch information or perform actions. You must use these tools in the following situations:
21
+
22
+ 1. When the request requires up-to-date information.
23
+ 2. When the request requires specific data that you do not have in your knowledge base.
24
+ 3. When the request involves actions that you cannot perform without tools.
25
+
26
+ Always prioritize using tools to provide the most accurate and helpful response. If tools are not available, inform the user that you cannot perform the requested action at the moment." %}
27
+
28
+ {{- bos_token }}
29
+
30
+ {%- set system_prompt = default_system_message %}
31
+ {%- set loop_messages = messages %}
32
+
33
+ {%- if not tools is defined %}
34
+ {%- set tools = none %}
35
+ {%- endif %}
36
+
37
+ {%- if messages|length > 0 and messages[0]['role'] == 'system' %}
38
+ {%- if messages[0]['content'] is string %}
39
+ {%- set system_prompt = messages[0]['content'] %}
40
+ {%- else %}
41
+ {%- set system_prompt = messages[0]['content'][0]['text'] %}
42
+ {%- endif %}
43
+ {%- set loop_messages = messages[1:] %}
44
+ {%- endif %}
45
+
46
+ {%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %}
47
+
48
+ {%- set ns = namespace(index=0) %}
49
+ {%- for message in loop_messages %}
50
+ {%- if not (message.role == "tool" or (message.get('tool_calls'))) %}
51
+ {%- if (message["role"] == "user") != (ns.index % 2 == 0) %}
52
+ {{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") }}
53
+ {%- endif %}
54
+ {%- set ns.index = ns.index + 1 %}
55
+ {%- endif %}
56
+ {%- endfor %}
57
+
58
+ {{- '[SYSTEM_PROMPT]' + system_prompt + '[/SYSTEM_PROMPT]' }}
59
+
60
+ {%- for message in loop_messages %}
61
+ {%- if message['role'] == 'system' %}
62
+ {%- if message['content'] is string %}
63
+ {{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
64
+ {%- else %}
65
+ {{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}
66
+ {%- endif %}
67
+ {%- elif message['role'] == 'user' %}
68
+ {%- if tools is not none and (message == user_messages[-1]) %}
69
+ {{- '[AVAILABLE_TOOLS]' + tools|tojson + '[/AVAILABLE_TOOLS]' }}
70
+ {%- endif %}
71
+ {{- '[INST]' }}
72
+ {%- if message['content'] is string %}
73
+ {{- message['content'] }}
74
+ {%- else %}
75
+ {%- for block in message['content'] %}
76
+ {%- if block['type'] == 'text' %}
77
+ {{- block['text'] }}
78
+ {%- elif block['type'] in ['image', 'image_url'] %}
79
+ {{- '[IMG]' }}
80
+ {%- else %}
81
+ {{- raise_exception('Only text and image blocks are supported in message content!') }}
82
+ {%- endif %}
83
+ {%- endfor %}
84
+ {%- endif %}
85
+ {{- '[/INST]' }}
86
+ {%- elif message['role'] == 'assistant' %}
87
+ {%- if message.get('tool_calls') %}
88
+ {%- for tool_call in message.tool_calls %}
89
+ {{- '[TOOL_CALLS]' + tool_call.function.name }}
90
+ {%- if not tool_call.id is defined or tool_call.id is not string or tool_call.id|length != 9 %}
91
+ {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }}
92
+ {%- endif %}
93
+ {{- '[CALL_ID]' + tool_call.id }}
94
+ {{- '[ARGS]' + tool_call['function']['arguments']|tojson }}
95
+ {%- endfor %}
96
+ {{- eos_token }}
97
+ {%- elif message['content'] is string %}
98
+ {{- message['content'] + eos_token }}
99
+ {%- else %}
100
+ {%- for block in message['content'] %}
101
+ {%- if block['type'] == 'text' %}
102
+ {{- block['text'] }}
103
+ {%- elif block['type'] in ['image', 'image_url'] %}
104
+ {{- '[IMG]' }}
105
+ {%- else %}
106
+ {{- raise_exception('Only text and image blocks are supported in assistant content!') }}
107
+ {%- endif %}
108
+ {%- endfor %}
109
+ {{- eos_token }}
110
+ {%- endif %}
111
+ {%- elif message['role'] == 'tool_results' or message['role'] == 'tool' %}
112
+ {%- if message.content is defined and message.content.content is defined %}
113
+ {%- set content = message.content.content %}
114
+ {%- else %}
115
+ {%- set content = message.content %}
116
+ {%- endif %}
117
+ {%- if not message.tool_call_id is defined or message.tool_call_id is not string or message['tool_call_id']|length != 9 %}
118
+ {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }}
119
+ {%- endif %}
120
+ {{- '[TOOL_RESULTS]' + message.tool_call_id + '[TOOL_CONTENT]' + content|string + '[/TOOL_RESULTS]' }}
121
+ {%- else %}
122
+ {{- raise_exception('Only system, user, assistant, and tool roles are supported!') }}
123
+ {%- endif %}
124
+ {%- endfor %}
@@ -42,8 +42,12 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
42
42
  { LLM_ARCH_GEMMA, "gemma" },
43
43
  { LLM_ARCH_GEMMA2, "gemma2" },
44
44
  { LLM_ARCH_GEMMA3, "gemma3" },
45
+ { LLM_ARCH_GEMMA3N, "gemma3n" },
45
46
  { LLM_ARCH_STARCODER2, "starcoder2" },
46
47
  { LLM_ARCH_MAMBA, "mamba" },
48
+ { LLM_ARCH_MAMBA2, "mamba2" },
49
+ { LLM_ARCH_JAMBA, "jamba" },
50
+ { LLM_ARCH_FALCON_H1, "falcon-h1" },
47
51
  { LLM_ARCH_XVERSE, "xverse" },
48
52
  { LLM_ARCH_COMMAND_R, "command-r" },
49
53
  { LLM_ARCH_COHERE2, "cohere2" },
@@ -69,12 +73,17 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
69
73
  { LLM_ARCH_ARWKV7, "arwkv7" },
70
74
  { LLM_ARCH_GRANITE, "granite" },
71
75
  { LLM_ARCH_GRANITE_MOE, "granitemoe" },
76
+ { LLM_ARCH_GRANITE_HYBRID, "granitehybrid" },
72
77
  { LLM_ARCH_CHAMELEON, "chameleon" },
73
78
  { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
74
79
  { LLM_ARCH_PLM, "plm" },
75
80
  { LLM_ARCH_BAILINGMOE, "bailingmoe" },
76
81
  { LLM_ARCH_DOTS1, "dots1" },
77
82
  { LLM_ARCH_ARCEE, "arcee" },
83
+ { LLM_ARCH_ERNIE4_5, "ernie4_5" },
84
+ { LLM_ARCH_HUNYUAN_MOE, "hunyuan-moe" },
85
+ { LLM_ARCH_SMOLLM3, "smollm3" },
86
+ { LLM_ARCH_LFM2, "lfm2" },
78
87
  { LLM_ARCH_UNKNOWN, "(unknown)" },
79
88
  };
80
89
 
@@ -147,7 +156,6 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
147
156
  { LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
148
157
  { LLM_KV_ATTENTION_KEY_LENGTH_MLA, "%s.attention.key_length_mla" },
149
158
  { LLM_KV_ATTENTION_VALUE_LENGTH_MLA, "%s.attention.value_length_mla" },
150
- { LLM_KV_ATTENTION_LAYER_INDICES, "%s.attention.layer_indices" },
151
159
 
152
160
  { LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
153
161
  { LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
@@ -168,6 +176,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
168
176
  { LLM_KV_SSM_INNER_SIZE, "%s.ssm.inner_size" },
169
177
  { LLM_KV_SSM_STATE_SIZE, "%s.ssm.state_size" },
170
178
  { LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
179
+ { LLM_KV_SSM_GROUP_COUNT, "%s.ssm.group_count" },
171
180
  { LLM_KV_SSM_DT_B_C_RMS, "%s.ssm.dt_b_c_rms" },
172
181
 
173
182
  { LLM_KV_WKV_HEAD_SIZE, "%s.wkv.head_size" },
@@ -180,6 +189,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
180
189
 
181
190
  { LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" },
182
191
 
192
+ { LLM_KV_SHORTCONV_L_CACHE, "%s.shortconv.l_cache" },
193
+
183
194
  { LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
184
195
  { LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
185
196
  { LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
@@ -198,6 +209,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
198
209
  { LLM_KV_TOKENIZER_MASK_ID, "tokenizer.ggml.mask_token_id" },
199
210
  { LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" },
200
211
  { LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" },
212
+ { LLM_KV_TOKENIZER_ADD_SEP, "tokenizer.ggml.add_sep_token" },
201
213
  { LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" },
202
214
  { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, "tokenizer.ggml.remove_extra_whitespaces" },
203
215
  { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" },
@@ -931,6 +943,42 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
931
943
  { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
932
944
  },
933
945
  },
946
+ {
947
+ LLM_ARCH_GEMMA3N,
948
+ {
949
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
950
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
951
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
952
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
953
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
954
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
955
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
956
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
957
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
958
+ { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
959
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
960
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
961
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
962
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
963
+ { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
964
+ { LLM_TENSOR_PER_LAYER_TOKEN_EMBD, "per_layer_token_embd" },
965
+ { LLM_TENSOR_PER_LAYER_MODEL_PROJ, "per_layer_model_proj" },
966
+ { LLM_TENSOR_PER_LAYER_PROJ_NORM, "per_layer_proj_norm" },
967
+ { LLM_TENSOR_ALTUP_UNEMBD_PROJ, "altup_unembd_proj" },
968
+ { LLM_TENSOR_ALTUP_PROJ, "altup_proj" },
969
+ { LLM_TENSOR_PER_LAYER_INP_GATE, "blk.%d.inp_gate" },
970
+ { LLM_TENSOR_PER_LAYER_PROJ, "blk.%d.proj" },
971
+ { LLM_TENSOR_PER_LAYER_POST_NORM, "blk.%d.post_norm" },
972
+ { LLM_TENSOR_ALTUP_CORRECT_COEF, "blk.%d.altup_correct_coef" },
973
+ { LLM_TENSOR_ALTUP_CORRECT_SCALE, "blk.%d.altup_correct_scale" },
974
+ { LLM_TENSOR_ALTUP_PREDICT_COEF, "blk.%d.altup_predict_coef" },
975
+ { LLM_TENSOR_ALTUP_ROUTER, "blk.%d.altup_router" },
976
+ { LLM_TENSOR_ALTUP_ROUTER_NORM, "blk.%d.altup_router_norm" },
977
+ { LLM_TENSOR_LAUREL_L, "blk.%d.laurel_l" },
978
+ { LLM_TENSOR_LAUREL_R, "blk.%d.laurel_r" },
979
+ { LLM_TENSOR_LAUREL_POST_NORM, "blk.%d.laurel_post_norm" },
980
+ },
981
+ },
934
982
  {
935
983
  LLM_ARCH_STARCODER2,
936
984
  {
@@ -965,6 +1013,77 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
965
1013
  { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
966
1014
  },
967
1015
  },
1016
+ {
1017
+ LLM_ARCH_MAMBA2,
1018
+ {
1019
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1020
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1021
+ { LLM_TENSOR_OUTPUT, "output" },
1022
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1023
+ { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
1024
+ { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
1025
+ { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
1026
+ { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
1027
+ { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
1028
+ { LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
1029
+ { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
1030
+ },
1031
+ },
1032
+ {
1033
+ LLM_ARCH_JAMBA,
1034
+ {
1035
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1036
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1037
+ { LLM_TENSOR_OUTPUT, "output" },
1038
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1039
+ { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
1040
+ { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
1041
+ { LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
1042
+ { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
1043
+ { LLM_TENSOR_SSM_DT_NORM, "blk.%d.ssm_dt_norm" },
1044
+ { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
1045
+ { LLM_TENSOR_SSM_B_NORM, "blk.%d.ssm_b_norm" },
1046
+ { LLM_TENSOR_SSM_C_NORM, "blk.%d.ssm_c_norm" },
1047
+ { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
1048
+ { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
1049
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1050
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1051
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1052
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1053
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1054
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1055
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1056
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1057
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1058
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1059
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1060
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1061
+ },
1062
+ },
1063
+ {
1064
+ LLM_ARCH_FALCON_H1,
1065
+ {
1066
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1067
+ { LLM_TENSOR_OUTPUT, "output" },
1068
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1069
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1070
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1071
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1072
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1073
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1074
+ { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
1075
+ { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
1076
+ { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
1077
+ { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
1078
+ { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
1079
+ { LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
1080
+ { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
1081
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1082
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1083
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1084
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1085
+ },
1086
+ },
968
1087
  {
969
1088
  LLM_ARCH_XVERSE,
970
1089
  {
@@ -1525,6 +1644,43 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1525
1644
  { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1526
1645
  },
1527
1646
  },
1647
+ {
1648
+ LLM_ARCH_GRANITE_HYBRID,
1649
+ {
1650
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1651
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1652
+ { LLM_TENSOR_OUTPUT, "output" },
1653
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1654
+ // mamba(2) ssm layers
1655
+ { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
1656
+ { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
1657
+ { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
1658
+ { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
1659
+ { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
1660
+ { LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
1661
+ { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
1662
+ // attention layers
1663
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1664
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1665
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1666
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1667
+ // dense FFN
1668
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1669
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1670
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1671
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1672
+ // moe FFN
1673
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1674
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1675
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1676
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1677
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1678
+ // shared expert
1679
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1680
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1681
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1682
+ },
1683
+ },
1528
1684
  {
1529
1685
  LLM_ARCH_CHAMELEON,
1530
1686
  {
@@ -1620,6 +1776,84 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1620
1776
  { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
1621
1777
  }
1622
1778
  },
1779
+ {
1780
+ LLM_ARCH_ERNIE4_5,
1781
+ {
1782
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1783
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1784
+ { LLM_TENSOR_OUTPUT, "output" },
1785
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1786
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1787
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1788
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1789
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1790
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1791
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1792
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1793
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1794
+ },
1795
+ },
1796
+ {
1797
+ LLM_ARCH_HUNYUAN_MOE,
1798
+ {
1799
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1800
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1801
+ { LLM_TENSOR_OUTPUT, "output" },
1802
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1803
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1804
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1805
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1806
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1807
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1808
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1809
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1810
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1811
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1812
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1813
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1814
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1815
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1816
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1817
+ },
1818
+ },
1819
+ {
1820
+ LLM_ARCH_SMOLLM3,
1821
+ {
1822
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1823
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1824
+ { LLM_TENSOR_OUTPUT, "output" },
1825
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1826
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1827
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1828
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1829
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1830
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1831
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1832
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1833
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1834
+ },
1835
+ },
1836
+ {
1837
+ LLM_ARCH_LFM2,
1838
+ {
1839
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1840
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1841
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1842
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1843
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1844
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1845
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1846
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1847
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1848
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1849
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1850
+ { LLM_TENSOR_SHORTCONV_CONV, "blk.%d.shortconv.conv" },
1851
+ { LLM_TENSOR_SHORTCONV_INPROJ, "blk.%d.shortconv.in_proj" },
1852
+ { LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" },
1853
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1854
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1855
+ }
1856
+ },
1623
1857
  {
1624
1858
  LLM_ARCH_UNKNOWN,
1625
1859
  {
@@ -1704,7 +1938,11 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1704
1938
  {LLM_TENSOR_FFN_ACT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_DIV}},
1705
1939
  {LLM_TENSOR_SSM_CONV1D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
1706
1940
  {LLM_TENSOR_SSM_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_SCAN}},
1941
+ {LLM_TENSOR_SSM_DT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1942
+ {LLM_TENSOR_SSM_B_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1943
+ {LLM_TENSOR_SSM_C_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1707
1944
  {LLM_TENSOR_SSM_D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1945
+ {LLM_TENSOR_SSM_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1708
1946
  {LLM_TENSOR_TIME_MIX_LERP_X, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1709
1947
  {LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1710
1948
  {LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
@@ -1748,6 +1986,23 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1748
1986
  {LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
1749
1987
  {LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
1750
1988
  {LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
1989
+ // altup / laurel (gemma 3n)
1990
+ {LLM_TENSOR_PER_LAYER_TOKEN_EMBD, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}},
1991
+ {LLM_TENSOR_PER_LAYER_MODEL_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
1992
+ {LLM_TENSOR_PER_LAYER_PROJ_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
1993
+ {LLM_TENSOR_ALTUP_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
1994
+ {LLM_TENSOR_ALTUP_UNEMBD_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
1995
+ {LLM_TENSOR_PER_LAYER_INP_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1996
+ {LLM_TENSOR_PER_LAYER_PROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1997
+ {LLM_TENSOR_PER_LAYER_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1998
+ {LLM_TENSOR_ALTUP_CORRECT_COEF, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1999
+ {LLM_TENSOR_ALTUP_CORRECT_SCALE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2000
+ {LLM_TENSOR_ALTUP_PREDICT_COEF, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2001
+ {LLM_TENSOR_ALTUP_ROUTER, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2002
+ {LLM_TENSOR_ALTUP_ROUTER_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2003
+ {LLM_TENSOR_LAUREL_L, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2004
+ {LLM_TENSOR_LAUREL_R, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2005
+ {LLM_TENSOR_LAUREL_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1751
2006
  // this tensor is loaded for T5, but never used
1752
2007
  {LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_NONE}},
1753
2008
  {LLM_TENSOR_CONV1D, {LLM_TENSOR_LAYER_INPUT, GGML_OP_IM2COL}},
@@ -1766,6 +2021,9 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1766
2021
  {LLM_TENSOR_CONVNEXT_PW1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1767
2022
  {LLM_TENSOR_CONVNEXT_PW2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1768
2023
  {LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2024
+ {LLM_TENSOR_SHORTCONV_CONV, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
2025
+ {LLM_TENSOR_SHORTCONV_INPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2026
+ {LLM_TENSOR_SHORTCONV_OUTPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1769
2027
  };
1770
2028
 
1771
2029
  LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
@@ -1821,6 +2079,7 @@ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
1821
2079
  bool llm_arch_is_recurrent(const llm_arch & arch) {
1822
2080
  switch (arch) {
1823
2081
  case LLM_ARCH_MAMBA:
2082
+ case LLM_ARCH_MAMBA2:
1824
2083
  case LLM_ARCH_RWKV6:
1825
2084
  case LLM_ARCH_RWKV6QWEN2:
1826
2085
  case LLM_ARCH_RWKV7:
@@ -1832,9 +2091,12 @@ bool llm_arch_is_recurrent(const llm_arch & arch) {
1832
2091
  }
1833
2092
 
1834
2093
  bool llm_arch_is_hybrid(const llm_arch & arch) {
1835
- // TODO: There are currently no hybrid models! Once there are, this will be
1836
- // the place to identify them
1837
2094
  switch (arch) {
2095
+ case LLM_ARCH_JAMBA:
2096
+ case LLM_ARCH_FALCON_H1:
2097
+ case LLM_ARCH_GRANITE_HYBRID:
2098
+ case LLM_ARCH_LFM2:
2099
+ return true;
1838
2100
  default:
1839
2101
  return false;
1840
2102
  }
@@ -46,8 +46,12 @@ enum llm_arch {
46
46
  LLM_ARCH_GEMMA,
47
47
  LLM_ARCH_GEMMA2,
48
48
  LLM_ARCH_GEMMA3,
49
+ LLM_ARCH_GEMMA3N,
49
50
  LLM_ARCH_STARCODER2,
50
51
  LLM_ARCH_MAMBA,
52
+ LLM_ARCH_MAMBA2,
53
+ LLM_ARCH_JAMBA,
54
+ LLM_ARCH_FALCON_H1,
51
55
  LLM_ARCH_XVERSE,
52
56
  LLM_ARCH_COMMAND_R,
53
57
  LLM_ARCH_COHERE2,
@@ -73,12 +77,17 @@ enum llm_arch {
73
77
  LLM_ARCH_ARWKV7,
74
78
  LLM_ARCH_GRANITE,
75
79
  LLM_ARCH_GRANITE_MOE,
80
+ LLM_ARCH_GRANITE_HYBRID,
76
81
  LLM_ARCH_CHAMELEON,
77
82
  LLM_ARCH_WAVTOKENIZER_DEC,
78
83
  LLM_ARCH_PLM,
79
84
  LLM_ARCH_BAILINGMOE,
80
85
  LLM_ARCH_DOTS1,
81
86
  LLM_ARCH_ARCEE,
87
+ LLM_ARCH_ERNIE4_5,
88
+ LLM_ARCH_HUNYUAN_MOE,
89
+ LLM_ARCH_SMOLLM3,
90
+ LLM_ARCH_LFM2,
82
91
  LLM_ARCH_UNKNOWN,
83
92
  };
84
93
 
@@ -151,7 +160,6 @@ enum llm_kv {
151
160
  LLM_KV_ATTENTION_SCALE,
152
161
  LLM_KV_ATTENTION_KEY_LENGTH_MLA,
153
162
  LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
154
- LLM_KV_ATTENTION_LAYER_INDICES,
155
163
 
156
164
  LLM_KV_ROPE_DIMENSION_COUNT,
157
165
  LLM_KV_ROPE_DIMENSION_SECTIONS,
@@ -172,6 +180,7 @@ enum llm_kv {
172
180
  LLM_KV_SSM_CONV_KERNEL,
173
181
  LLM_KV_SSM_STATE_SIZE,
174
182
  LLM_KV_SSM_TIME_STEP_RANK,
183
+ LLM_KV_SSM_GROUP_COUNT,
175
184
  LLM_KV_SSM_DT_B_C_RMS,
176
185
 
177
186
  LLM_KV_WKV_HEAD_SIZE,
@@ -194,6 +203,7 @@ enum llm_kv {
194
203
  LLM_KV_TOKENIZER_MASK_ID,
195
204
  LLM_KV_TOKENIZER_ADD_BOS,
196
205
  LLM_KV_TOKENIZER_ADD_EOS,
206
+ LLM_KV_TOKENIZER_ADD_SEP,
197
207
  LLM_KV_TOKENIZER_ADD_PREFIX,
198
208
  LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
199
209
  LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
@@ -218,6 +228,8 @@ enum llm_kv {
218
228
 
219
229
  LLM_KV_CLASSIFIER_OUTPUT_LABELS,
220
230
 
231
+ LLM_KV_SHORTCONV_L_CACHE,
232
+
221
233
  // deprecated:
222
234
  LLM_KV_TOKENIZER_PREFIX_ID,
223
235
  LLM_KV_TOKENIZER_SUFFIX_ID,
@@ -268,12 +280,32 @@ enum llm_tensor {
268
280
  LLM_TENSOR_LAYER_OUT_NORM,
269
281
  LLM_TENSOR_POST_ATTN_NORM,
270
282
  LLM_TENSOR_POST_MLP_NORM,
283
+ LLM_TENSOR_PER_LAYER_TOKEN_EMBD, // gemma3n
284
+ LLM_TENSOR_PER_LAYER_MODEL_PROJ, // gemma3n
285
+ LLM_TENSOR_PER_LAYER_INP_GATE, // gemma3n
286
+ LLM_TENSOR_PER_LAYER_PROJ, // gemma3n
287
+ LLM_TENSOR_PER_LAYER_PROJ_NORM, // gemma3n
288
+ LLM_TENSOR_PER_LAYER_POST_NORM, // gemma3n
289
+ LLM_TENSOR_ALTUP_PROJ, // gemma3n
290
+ LLM_TENSOR_ALTUP_UNEMBD_PROJ, // gemma3n
291
+ LLM_TENSOR_ALTUP_CORRECT_COEF, // gemma3n
292
+ LLM_TENSOR_ALTUP_CORRECT_SCALE, // gemma3n
293
+ LLM_TENSOR_ALTUP_PREDICT_COEF, // gemma3n
294
+ LLM_TENSOR_ALTUP_ROUTER, // gemma3n
295
+ LLM_TENSOR_ALTUP_ROUTER_NORM, // gemma3n
296
+ LLM_TENSOR_LAUREL_L, // gemma3n
297
+ LLM_TENSOR_LAUREL_R, // gemma3n
298
+ LLM_TENSOR_LAUREL_POST_NORM, // gemma3n
271
299
  LLM_TENSOR_SSM_IN,
272
300
  LLM_TENSOR_SSM_CONV1D,
273
301
  LLM_TENSOR_SSM_X,
274
302
  LLM_TENSOR_SSM_DT,
303
+ LLM_TENSOR_SSM_DT_NORM,
275
304
  LLM_TENSOR_SSM_A,
305
+ LLM_TENSOR_SSM_B_NORM,
306
+ LLM_TENSOR_SSM_C_NORM,
276
307
  LLM_TENSOR_SSM_D,
308
+ LLM_TENSOR_SSM_NORM,
277
309
  LLM_TENSOR_SSM_OUT,
278
310
  LLM_TENSOR_TIME_MIX_W0,
279
311
  LLM_TENSOR_TIME_MIX_W1,
@@ -367,6 +399,9 @@ enum llm_tensor {
367
399
  LLM_TENSOR_POS_NET_ATTN_K,
368
400
  LLM_TENSOR_POS_NET_ATTN_V,
369
401
  LLM_TENSOR_POS_NET_ATTN_OUT,
402
+ LLM_TENSOR_SHORTCONV_CONV,
403
+ LLM_TENSOR_SHORTCONV_INPROJ,
404
+ LLM_TENSOR_SHORTCONV_OUTPROJ,
370
405
  };
371
406
 
372
407
  enum llm_tensor_layer {