@novastera-oss/llamarn 0.2.9 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. package/android/build.gradle +2 -1
  2. package/android/proguard-rules.pro +12 -0
  3. package/android/src/main/cpp/include/llama.h +15 -47
  4. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  8. package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
  9. package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
  10. package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
  11. package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
  12. package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
  13. package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
  14. package/android/src/main/jniLibs/x86/libggml.so +0 -0
  15. package/android/src/main/jniLibs/x86/libllama.so +0 -0
  16. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  17. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  18. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  19. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  20. package/cpp/build-info.cpp +2 -2
  21. package/cpp/llama.cpp/CMakeLists.txt +0 -1
  22. package/cpp/llama.cpp/CMakePresets.json +11 -0
  23. package/cpp/llama.cpp/CODEOWNERS +1 -0
  24. package/cpp/llama.cpp/README.md +8 -8
  25. package/cpp/llama.cpp/build-xcframework.sh +1 -1
  26. package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
  27. package/cpp/llama.cpp/common/arg.cpp +62 -1
  28. package/cpp/llama.cpp/common/chat.cpp +37 -20
  29. package/cpp/llama.cpp/common/chat.h +2 -0
  30. package/cpp/llama.cpp/common/common.cpp +22 -6
  31. package/cpp/llama.cpp/common/common.h +22 -4
  32. package/cpp/llama.cpp/convert_hf_to_gguf.py +1250 -43
  33. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +21 -13
  34. package/cpp/llama.cpp/ggml/CMakeLists.txt +13 -3
  35. package/cpp/llama.cpp/ggml/cmake/ggml-config.cmake.in +85 -47
  36. package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
  37. package/cpp/llama.cpp/ggml/include/ggml-webgpu.h +19 -0
  38. package/cpp/llama.cpp/ggml/include/ggml.h +173 -10
  39. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +1 -1
  40. package/cpp/llama.cpp/ggml/src/ggml-alloc.c +0 -15
  41. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +7 -8
  42. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +44 -38
  43. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +3 -1
  44. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +126 -8
  45. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +130 -22
  46. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +138 -18
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +11 -3
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +1 -1
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +28 -1
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +109 -12
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +3 -0
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +88 -10
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +343 -1094
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1206 -163
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +6 -0
  56. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +0 -1
  57. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1 -1
  58. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +36 -9
  59. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +142 -9
  60. package/cpp/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +3 -3
  61. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +31 -4
  62. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +86 -17
  63. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
  64. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy-utils.cuh +225 -0
  65. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +41 -301
  66. package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
  67. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +85 -64
  68. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +47 -60
  69. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +29 -42
  70. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +46 -59
  71. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +36 -45
  72. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +38 -45
  73. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +23 -36
  74. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +3 -13
  75. package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
  76. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +255 -99
  77. package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cu +1 -1
  78. package/cpp/llama.cpp/ggml/src/ggml-cuda/mma.cuh +111 -3
  79. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +6 -4
  80. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +1152 -695
  81. package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cu +92 -5
  82. package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cuh +2 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
  84. package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
  85. package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cu +275 -0
  86. package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cuh +7 -0
  87. package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
  88. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
  89. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
  90. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +104 -0
  91. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +13 -0
  92. package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
  93. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +27 -6
  94. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +2 -2
  95. package/cpp/llama.cpp/ggml/src/ggml-impl.h +80 -0
  96. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
  97. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +48 -12
  98. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +572 -106
  99. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +599 -105
  100. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +18 -4
  101. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +5 -0
  102. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +800 -42
  103. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  104. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  105. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
  106. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
  107. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +1 -1
  108. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +1 -1
  109. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  110. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +79 -0
  111. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
  112. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
  113. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
  114. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
  115. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
  116. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
  117. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
  118. package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
  119. package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +4 -4
  120. package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
  121. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +693 -1034
  122. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
  123. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +14 -26
  124. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +191 -55
  125. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
  126. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +8 -9
  127. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +15 -18
  128. package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
  129. package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
  130. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +2 -6
  131. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +991 -307
  132. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +265 -0
  133. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +59 -12
  134. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +1 -1
  135. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
  136. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +1 -1
  137. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +1 -1
  138. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
  139. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
  140. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
  141. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
  142. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
  143. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
  144. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
  145. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
  146. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
  147. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
  148. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +2 -0
  149. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +17 -0
  150. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
  151. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +3 -8
  152. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
  153. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
  154. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
  155. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +18 -3
  156. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
  157. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +1 -4
  158. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
  159. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
  160. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
  161. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp +5 -0
  162. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
  163. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
  164. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
  165. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
  166. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +84 -9
  167. package/cpp/llama.cpp/ggml/src/ggml-webgpu/CMakeLists.txt +54 -0
  168. package/cpp/llama.cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +907 -0
  169. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +60 -0
  170. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +35 -0
  171. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  172. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +56 -0
  173. package/cpp/llama.cpp/ggml/src/ggml.c +386 -67
  174. package/cpp/llama.cpp/ggml/src/gguf.cpp +8 -1
  175. package/cpp/llama.cpp/gguf-py/gguf/constants.py +307 -0
  176. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +8 -2
  177. package/cpp/llama.cpp/gguf-py/gguf/metadata.py +4 -0
  178. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_dump.py +24 -1
  179. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +122 -47
  180. package/cpp/llama.cpp/gguf-py/gguf/vocab.py +12 -3
  181. package/cpp/llama.cpp/include/llama.h +15 -47
  182. package/cpp/llama.cpp/models/templates/llama-cpp-rwkv-world.jinja +34 -0
  183. package/cpp/llama.cpp/models/templates/moonshotai-Kimi-K2.jinja +43 -0
  184. package/cpp/llama.cpp/requirements/requirements-all.txt +1 -0
  185. package/cpp/llama.cpp/requirements/requirements-server-bench.txt +5 -0
  186. package/cpp/llama.cpp/src/llama-arch.cpp +316 -3
  187. package/cpp/llama.cpp/src/llama-arch.h +23 -1
  188. package/cpp/llama.cpp/src/llama-batch.cpp +103 -71
  189. package/cpp/llama.cpp/src/llama-batch.h +31 -18
  190. package/cpp/llama.cpp/src/llama-chat.cpp +58 -1
  191. package/cpp/llama.cpp/src/llama-chat.h +3 -0
  192. package/cpp/llama.cpp/src/llama-context.cpp +180 -106
  193. package/cpp/llama.cpp/src/llama-context.h +26 -16
  194. package/cpp/llama.cpp/src/llama-cparams.h +3 -2
  195. package/cpp/llama.cpp/src/llama-graph.cpp +310 -211
  196. package/cpp/llama.cpp/src/llama-graph.h +184 -122
  197. package/cpp/llama.cpp/src/llama-hparams.cpp +47 -1
  198. package/cpp/llama.cpp/src/llama-hparams.h +13 -2
  199. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +38 -22
  200. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +7 -2
  201. package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +849 -304
  202. package/cpp/llama.cpp/src/llama-kv-cache-unified.h +143 -47
  203. package/cpp/llama.cpp/src/llama-kv-cells.h +62 -10
  204. package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +10 -4
  205. package/cpp/llama.cpp/src/llama-memory-hybrid.h +3 -1
  206. package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +36 -11
  207. package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
  208. package/cpp/llama.cpp/src/llama-memory.h +3 -0
  209. package/cpp/llama.cpp/src/llama-model.cpp +3545 -719
  210. package/cpp/llama.cpp/src/llama-model.h +21 -4
  211. package/cpp/llama.cpp/src/llama-quant.cpp +2 -2
  212. package/cpp/llama.cpp/src/llama-vocab.cpp +376 -10
  213. package/cpp/llama.cpp/src/llama-vocab.h +43 -0
  214. package/cpp/llama.cpp/src/unicode.cpp +207 -0
  215. package/cpp/llama.cpp/src/unicode.h +2 -0
  216. package/ios/include/chat.h +2 -0
  217. package/ios/include/common.h +22 -4
  218. package/ios/include/llama.h +15 -47
  219. package/ios/libs/llama.xcframework/Info.plist +13 -13
  220. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  221. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5267 -4890
  222. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  223. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +173 -10
  224. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +15 -47
  225. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  226. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  227. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
  228. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4014 -3764
  229. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  230. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
  231. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
  232. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  233. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  234. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
  235. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4016 -3766
  236. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
  237. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +173 -10
  238. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +15 -47
  239. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
  240. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +173 -10
  241. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +15 -47
  242. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  243. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
  244. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +173 -10
  245. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +15 -47
  246. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  247. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  248. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  249. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5267 -4890
  250. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  251. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +173 -10
  252. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +15 -47
  253. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  254. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  255. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
  256. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4014 -3764
  257. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  258. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
  259. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
  260. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  261. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  262. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5303 -4926
  263. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  264. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +173 -10
  265. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +15 -47
  266. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  267. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  268. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5274 -4897
  269. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4044 -3794
  270. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  271. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
  272. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
  273. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  274. package/package.json +4 -4
  275. package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
  276. package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  277. package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  278. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
  279. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
  280. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
  281. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
  282. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
  283. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
  284. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
  285. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
  286. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
  287. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
  288. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
  289. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
  290. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
  291. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
  292. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
  293. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
  294. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
  295. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
  296. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
  297. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
  298. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
  299. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
  300. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
  301. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
  302. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
  303. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
  304. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
  305. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
  306. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
  307. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
  308. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
  309. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
  310. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
  311. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
  312. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
  313. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
  314. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
@@ -34,6 +34,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
34
34
  { LLM_ARCH_PHI3, "phi3" },
35
35
  { LLM_ARCH_PHIMOE, "phimoe" },
36
36
  { LLM_ARCH_PLAMO, "plamo" },
37
+ { LLM_ARCH_PLAMO2, "plamo2" },
37
38
  { LLM_ARCH_CODESHELL, "codeshell" },
38
39
  { LLM_ARCH_ORION, "orion" },
39
40
  { LLM_ARCH_INTERNLM2, "internlm2" },
@@ -45,6 +46,9 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
45
46
  { LLM_ARCH_GEMMA3N, "gemma3n" },
46
47
  { LLM_ARCH_STARCODER2, "starcoder2" },
47
48
  { LLM_ARCH_MAMBA, "mamba" },
49
+ { LLM_ARCH_MAMBA2, "mamba2" },
50
+ { LLM_ARCH_JAMBA, "jamba" },
51
+ { LLM_ARCH_FALCON_H1, "falcon-h1" },
48
52
  { LLM_ARCH_XVERSE, "xverse" },
49
53
  { LLM_ARCH_COMMAND_R, "command-r" },
50
54
  { LLM_ARCH_COHERE2, "cohere2" },
@@ -64,18 +68,26 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
64
68
  { LLM_ARCH_JAIS, "jais" },
65
69
  { LLM_ARCH_NEMOTRON, "nemotron" },
66
70
  { LLM_ARCH_EXAONE, "exaone" },
71
+ { LLM_ARCH_EXAONE4, "exaone4" },
67
72
  { LLM_ARCH_RWKV6, "rwkv6" },
68
73
  { LLM_ARCH_RWKV6QWEN2, "rwkv6qwen2" },
69
74
  { LLM_ARCH_RWKV7, "rwkv7" },
70
75
  { LLM_ARCH_ARWKV7, "arwkv7" },
71
76
  { LLM_ARCH_GRANITE, "granite" },
72
77
  { LLM_ARCH_GRANITE_MOE, "granitemoe" },
78
+ { LLM_ARCH_GRANITE_HYBRID, "granitehybrid" },
73
79
  { LLM_ARCH_CHAMELEON, "chameleon" },
74
80
  { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
75
81
  { LLM_ARCH_PLM, "plm" },
76
82
  { LLM_ARCH_BAILINGMOE, "bailingmoe" },
77
83
  { LLM_ARCH_DOTS1, "dots1" },
78
84
  { LLM_ARCH_ARCEE, "arcee" },
85
+ { LLM_ARCH_ERNIE4_5, "ernie4_5" },
86
+ { LLM_ARCH_ERNIE4_5_MOE, "ernie4_5-moe" },
87
+ { LLM_ARCH_HUNYUAN_MOE, "hunyuan-moe" },
88
+ { LLM_ARCH_SMOLLM3, "smollm3" },
89
+ { LLM_ARCH_LFM2, "lfm2" },
90
+ { LLM_ARCH_DREAM, "dream" },
79
91
  { LLM_ARCH_UNKNOWN, "(unknown)" },
80
92
  };
81
93
 
@@ -148,7 +160,6 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
148
160
  { LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
149
161
  { LLM_KV_ATTENTION_KEY_LENGTH_MLA, "%s.attention.key_length_mla" },
150
162
  { LLM_KV_ATTENTION_VALUE_LENGTH_MLA, "%s.attention.value_length_mla" },
151
- { LLM_KV_ATTENTION_LAYER_INDICES, "%s.attention.layer_indices" },
152
163
 
153
164
  { LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
154
165
  { LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
@@ -169,6 +180,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
169
180
  { LLM_KV_SSM_INNER_SIZE, "%s.ssm.inner_size" },
170
181
  { LLM_KV_SSM_STATE_SIZE, "%s.ssm.state_size" },
171
182
  { LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
183
+ { LLM_KV_SSM_GROUP_COUNT, "%s.ssm.group_count" },
172
184
  { LLM_KV_SSM_DT_B_C_RMS, "%s.ssm.dt_b_c_rms" },
173
185
 
174
186
  { LLM_KV_WKV_HEAD_SIZE, "%s.wkv.head_size" },
@@ -181,6 +193,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
181
193
 
182
194
  { LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" },
183
195
 
196
+ { LLM_KV_SHORTCONV_L_CACHE, "%s.shortconv.l_cache" },
197
+
184
198
  { LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
185
199
  { LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
186
200
  { LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
@@ -774,6 +788,36 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
774
788
  { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
775
789
  },
776
790
  },
791
+ {
792
+ LLM_ARCH_PLAMO2,
793
+ {
794
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
795
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
796
+ { LLM_TENSOR_OUTPUT, "output" },
797
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
798
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
799
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
800
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
801
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
802
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
803
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
804
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
805
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
806
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
807
+ { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
808
+ { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
809
+ { LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
810
+ { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
811
+ { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
812
+ { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
813
+ { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
814
+ { LLM_TENSOR_SSM_DT_NORM, "blk.%d.ssm_dt_norm" },
815
+ { LLM_TENSOR_SSM_B_NORM, "blk.%d.ssm_b_norm" },
816
+ { LLM_TENSOR_SSM_C_NORM, "blk.%d.ssm_c_norm" },
817
+ { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
818
+ { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
819
+ },
820
+ },
777
821
  {
778
822
  LLM_ARCH_CODESHELL,
779
823
  {
@@ -1003,6 +1047,77 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1003
1047
  { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
1004
1048
  },
1005
1049
  },
1050
+ {
1051
+ LLM_ARCH_MAMBA2,
1052
+ {
1053
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1054
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1055
+ { LLM_TENSOR_OUTPUT, "output" },
1056
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1057
+ { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
1058
+ { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
1059
+ { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
1060
+ { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
1061
+ { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
1062
+ { LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
1063
+ { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
1064
+ },
1065
+ },
1066
+ {
1067
+ LLM_ARCH_JAMBA,
1068
+ {
1069
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1070
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1071
+ { LLM_TENSOR_OUTPUT, "output" },
1072
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1073
+ { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
1074
+ { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
1075
+ { LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
1076
+ { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
1077
+ { LLM_TENSOR_SSM_DT_NORM, "blk.%d.ssm_dt_norm" },
1078
+ { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
1079
+ { LLM_TENSOR_SSM_B_NORM, "blk.%d.ssm_b_norm" },
1080
+ { LLM_TENSOR_SSM_C_NORM, "blk.%d.ssm_c_norm" },
1081
+ { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
1082
+ { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
1083
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1084
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1085
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1086
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1087
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1088
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1089
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1090
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1091
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1092
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1093
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1094
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1095
+ },
1096
+ },
1097
+ {
1098
+ LLM_ARCH_FALCON_H1,
1099
+ {
1100
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1101
+ { LLM_TENSOR_OUTPUT, "output" },
1102
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1103
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1104
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1105
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1106
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1107
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1108
+ { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
1109
+ { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
1110
+ { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
1111
+ { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
1112
+ { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
1113
+ { LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
1114
+ { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
1115
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1116
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1117
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1118
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1119
+ },
1120
+ },
1006
1121
  {
1007
1122
  LLM_ARCH_XVERSE,
1008
1123
  {
@@ -1396,6 +1511,26 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1396
1511
  { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1397
1512
  },
1398
1513
  },
1514
+ {
1515
+ LLM_ARCH_EXAONE4,
1516
+ {
1517
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1518
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1519
+ { LLM_TENSOR_OUTPUT, "output" },
1520
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1521
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1522
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1523
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1524
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1525
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1526
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1527
+ { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
1528
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1529
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1530
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1531
+ { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
1532
+ }
1533
+ },
1399
1534
  {
1400
1535
  LLM_ARCH_RWKV6,
1401
1536
  {
@@ -1563,6 +1698,43 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1563
1698
  { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1564
1699
  },
1565
1700
  },
1701
+ {
1702
+ LLM_ARCH_GRANITE_HYBRID,
1703
+ {
1704
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1705
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1706
+ { LLM_TENSOR_OUTPUT, "output" },
1707
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1708
+ // mamba(2) ssm layers
1709
+ { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
1710
+ { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
1711
+ { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
1712
+ { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
1713
+ { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
1714
+ { LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
1715
+ { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
1716
+ // attention layers
1717
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1718
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1719
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1720
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1721
+ // dense FFN
1722
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1723
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1724
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1725
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1726
+ // moe FFN
1727
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1728
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1729
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1730
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1731
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1732
+ // shared expert
1733
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1734
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1735
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1736
+ },
1737
+ },
1566
1738
  {
1567
1739
  LLM_ARCH_CHAMELEON,
1568
1740
  {
@@ -1658,6 +1830,126 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1658
1830
  { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
1659
1831
  }
1660
1832
  },
1833
+ {
1834
+ LLM_ARCH_ERNIE4_5,
1835
+ {
1836
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1837
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1838
+ { LLM_TENSOR_OUTPUT, "output" },
1839
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1840
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1841
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1842
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1843
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1844
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1845
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1846
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1847
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1848
+ },
1849
+ },
1850
+ {
1851
+ LLM_ARCH_ERNIE4_5_MOE,
1852
+ {
1853
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1854
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1855
+ { LLM_TENSOR_OUTPUT, "output" },
1856
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1857
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1858
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1859
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1860
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1861
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1862
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1863
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1864
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1865
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1866
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1867
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1868
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1869
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1870
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1871
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1872
+ { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
1873
+ },
1874
+ },
1875
+ {
1876
+ LLM_ARCH_HUNYUAN_MOE,
1877
+ {
1878
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1879
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1880
+ { LLM_TENSOR_OUTPUT, "output" },
1881
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1882
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1883
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1884
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1885
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1886
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1887
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1888
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1889
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1890
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1891
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1892
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1893
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1894
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1895
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1896
+ },
1897
+ },
1898
+ {
1899
+ LLM_ARCH_SMOLLM3,
1900
+ {
1901
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1902
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1903
+ { LLM_TENSOR_OUTPUT, "output" },
1904
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1905
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1906
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1907
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1908
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1909
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1910
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1911
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1912
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1913
+ },
1914
+ },
1915
+ {
1916
+ LLM_ARCH_LFM2,
1917
+ {
1918
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1919
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1920
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1921
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1922
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1923
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1924
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1925
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1926
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1927
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1928
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1929
+ { LLM_TENSOR_SHORTCONV_CONV, "blk.%d.shortconv.conv" },
1930
+ { LLM_TENSOR_SHORTCONV_INPROJ, "blk.%d.shortconv.in_proj" },
1931
+ { LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" },
1932
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1933
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1934
+ }
1935
+ },
1936
+ {
1937
+ LLM_ARCH_DREAM,
1938
+ {
1939
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1940
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1941
+ { LLM_TENSOR_OUTPUT, "output" },
1942
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1943
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1944
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1945
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1946
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1947
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1948
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1949
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1950
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1951
+ },
1952
+ },
1661
1953
  {
1662
1954
  LLM_ARCH_UNKNOWN,
1663
1955
  {
@@ -1742,7 +2034,11 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1742
2034
  {LLM_TENSOR_FFN_ACT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_DIV}},
1743
2035
  {LLM_TENSOR_SSM_CONV1D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
1744
2036
  {LLM_TENSOR_SSM_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_SCAN}},
2037
+ {LLM_TENSOR_SSM_DT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2038
+ {LLM_TENSOR_SSM_B_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2039
+ {LLM_TENSOR_SSM_C_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1745
2040
  {LLM_TENSOR_SSM_D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2041
+ {LLM_TENSOR_SSM_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1746
2042
  {LLM_TENSOR_TIME_MIX_LERP_X, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1747
2043
  {LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1748
2044
  {LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
@@ -1821,6 +2117,9 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1821
2117
  {LLM_TENSOR_CONVNEXT_PW1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1822
2118
  {LLM_TENSOR_CONVNEXT_PW2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1823
2119
  {LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2120
+ {LLM_TENSOR_SHORTCONV_CONV, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
2121
+ {LLM_TENSOR_SHORTCONV_INPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2122
+ {LLM_TENSOR_SHORTCONV_OUTPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1824
2123
  };
1825
2124
 
1826
2125
  LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
@@ -1876,6 +2175,7 @@ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
1876
2175
  bool llm_arch_is_recurrent(const llm_arch & arch) {
1877
2176
  switch (arch) {
1878
2177
  case LLM_ARCH_MAMBA:
2178
+ case LLM_ARCH_MAMBA2:
1879
2179
  case LLM_ARCH_RWKV6:
1880
2180
  case LLM_ARCH_RWKV6QWEN2:
1881
2181
  case LLM_ARCH_RWKV7:
@@ -1887,9 +2187,22 @@ bool llm_arch_is_recurrent(const llm_arch & arch) {
1887
2187
  }
1888
2188
 
1889
2189
  bool llm_arch_is_hybrid(const llm_arch & arch) {
1890
- // TODO: There are currently no hybrid models! Once there are, this will be
1891
- // the place to identify them
1892
2190
  switch (arch) {
2191
+ case LLM_ARCH_JAMBA:
2192
+ case LLM_ARCH_FALCON_H1:
2193
+ case LLM_ARCH_PLAMO2:
2194
+ case LLM_ARCH_GRANITE_HYBRID:
2195
+ case LLM_ARCH_LFM2:
2196
+ return true;
2197
+ default:
2198
+ return false;
2199
+ }
2200
+ }
2201
+
2202
+ bool llm_arch_is_diffusion(const llm_arch & arch) {
2203
+ switch (arch) {
2204
+ case LLM_ARCH_DREAM:
2205
+ return true;
1893
2206
  default:
1894
2207
  return false;
1895
2208
  }
@@ -38,6 +38,7 @@ enum llm_arch {
38
38
  LLM_ARCH_PHI3,
39
39
  LLM_ARCH_PHIMOE,
40
40
  LLM_ARCH_PLAMO,
41
+ LLM_ARCH_PLAMO2,
41
42
  LLM_ARCH_CODESHELL,
42
43
  LLM_ARCH_ORION,
43
44
  LLM_ARCH_INTERNLM2,
@@ -49,6 +50,9 @@ enum llm_arch {
49
50
  LLM_ARCH_GEMMA3N,
50
51
  LLM_ARCH_STARCODER2,
51
52
  LLM_ARCH_MAMBA,
53
+ LLM_ARCH_MAMBA2,
54
+ LLM_ARCH_JAMBA,
55
+ LLM_ARCH_FALCON_H1,
52
56
  LLM_ARCH_XVERSE,
53
57
  LLM_ARCH_COMMAND_R,
54
58
  LLM_ARCH_COHERE2,
@@ -68,18 +72,26 @@ enum llm_arch {
68
72
  LLM_ARCH_JAIS,
69
73
  LLM_ARCH_NEMOTRON,
70
74
  LLM_ARCH_EXAONE,
75
+ LLM_ARCH_EXAONE4,
71
76
  LLM_ARCH_RWKV6,
72
77
  LLM_ARCH_RWKV6QWEN2,
73
78
  LLM_ARCH_RWKV7,
74
79
  LLM_ARCH_ARWKV7,
75
80
  LLM_ARCH_GRANITE,
76
81
  LLM_ARCH_GRANITE_MOE,
82
+ LLM_ARCH_GRANITE_HYBRID,
77
83
  LLM_ARCH_CHAMELEON,
78
84
  LLM_ARCH_WAVTOKENIZER_DEC,
79
85
  LLM_ARCH_PLM,
80
86
  LLM_ARCH_BAILINGMOE,
81
87
  LLM_ARCH_DOTS1,
82
88
  LLM_ARCH_ARCEE,
89
+ LLM_ARCH_ERNIE4_5,
90
+ LLM_ARCH_ERNIE4_5_MOE,
91
+ LLM_ARCH_HUNYUAN_MOE,
92
+ LLM_ARCH_SMOLLM3,
93
+ LLM_ARCH_LFM2,
94
+ LLM_ARCH_DREAM,
83
95
  LLM_ARCH_UNKNOWN,
84
96
  };
85
97
 
@@ -152,7 +164,6 @@ enum llm_kv {
152
164
  LLM_KV_ATTENTION_SCALE,
153
165
  LLM_KV_ATTENTION_KEY_LENGTH_MLA,
154
166
  LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
155
- LLM_KV_ATTENTION_LAYER_INDICES,
156
167
 
157
168
  LLM_KV_ROPE_DIMENSION_COUNT,
158
169
  LLM_KV_ROPE_DIMENSION_SECTIONS,
@@ -173,6 +184,7 @@ enum llm_kv {
173
184
  LLM_KV_SSM_CONV_KERNEL,
174
185
  LLM_KV_SSM_STATE_SIZE,
175
186
  LLM_KV_SSM_TIME_STEP_RANK,
187
+ LLM_KV_SSM_GROUP_COUNT,
176
188
  LLM_KV_SSM_DT_B_C_RMS,
177
189
 
178
190
  LLM_KV_WKV_HEAD_SIZE,
@@ -220,6 +232,8 @@ enum llm_kv {
220
232
 
221
233
  LLM_KV_CLASSIFIER_OUTPUT_LABELS,
222
234
 
235
+ LLM_KV_SHORTCONV_L_CACHE,
236
+
223
237
  // deprecated:
224
238
  LLM_KV_TOKENIZER_PREFIX_ID,
225
239
  LLM_KV_TOKENIZER_SUFFIX_ID,
@@ -290,8 +304,12 @@ enum llm_tensor {
290
304
  LLM_TENSOR_SSM_CONV1D,
291
305
  LLM_TENSOR_SSM_X,
292
306
  LLM_TENSOR_SSM_DT,
307
+ LLM_TENSOR_SSM_DT_NORM,
293
308
  LLM_TENSOR_SSM_A,
309
+ LLM_TENSOR_SSM_B_NORM,
310
+ LLM_TENSOR_SSM_C_NORM,
294
311
  LLM_TENSOR_SSM_D,
312
+ LLM_TENSOR_SSM_NORM,
295
313
  LLM_TENSOR_SSM_OUT,
296
314
  LLM_TENSOR_TIME_MIX_W0,
297
315
  LLM_TENSOR_TIME_MIX_W1,
@@ -385,6 +403,9 @@ enum llm_tensor {
385
403
  LLM_TENSOR_POS_NET_ATTN_K,
386
404
  LLM_TENSOR_POS_NET_ATTN_V,
387
405
  LLM_TENSOR_POS_NET_ATTN_OUT,
406
+ LLM_TENSOR_SHORTCONV_CONV,
407
+ LLM_TENSOR_SHORTCONV_INPROJ,
408
+ LLM_TENSOR_SHORTCONV_OUTPROJ,
388
409
  };
389
410
 
390
411
  enum llm_tensor_layer {
@@ -461,3 +482,4 @@ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
461
482
 
462
483
  bool llm_arch_is_recurrent(const llm_arch & arch);
463
484
  bool llm_arch_is_hybrid (const llm_arch & arch);
485
+ bool llm_arch_is_diffusion(const llm_arch & arch);