@novastera-oss/llamarn 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. package/android/src/main/cpp/include/llama.h +8 -3
  2. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  3. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  6. package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
  7. package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
  8. package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
  9. package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
  10. package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
  11. package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
  12. package/android/src/main/jniLibs/x86/libggml.so +0 -0
  13. package/android/src/main/jniLibs/x86/libllama.so +0 -0
  14. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  15. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  16. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  17. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  18. package/cpp/LlamaCppModel.cpp +56 -22
  19. package/cpp/build-info.cpp +2 -2
  20. package/cpp/llama.cpp/CMakeLists.txt +1 -2
  21. package/cpp/llama.cpp/README.md +4 -5
  22. package/cpp/llama.cpp/build-xcframework.sh +1 -1
  23. package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
  24. package/cpp/llama.cpp/common/arg.cpp +24 -0
  25. package/cpp/llama.cpp/common/chat.cpp +37 -20
  26. package/cpp/llama.cpp/common/chat.h +2 -0
  27. package/cpp/llama.cpp/common/common.cpp +3 -0
  28. package/cpp/llama.cpp/common/common.h +5 -0
  29. package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
  30. package/cpp/llama.cpp/convert_hf_to_gguf.py +860 -23
  31. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +9 -0
  32. package/cpp/llama.cpp/ggml/CMakeLists.txt +8 -2
  33. package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
  34. package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
  35. package/cpp/llama.cpp/ggml/include/ggml.h +206 -10
  36. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +17 -1
  37. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -8
  38. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +36 -18
  39. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +68 -5
  40. package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +1 -1
  41. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +16 -2
  42. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +37 -3
  43. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +10 -9
  44. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +109 -108
  45. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +1027 -1038
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +53 -52
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +56 -55
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +42 -41
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +24 -23
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +29 -28
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +30 -29
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +83 -82
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +20 -19
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +3 -2
  56. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +9 -3
  57. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +111 -103
  58. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
  59. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3 -2
  60. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1405 -240
  61. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +8 -0
  62. package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +25 -24
  63. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +56 -40
  64. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +212 -34
  65. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +35 -11
  66. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +187 -54
  67. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +71 -29
  68. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
  69. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
  70. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
  71. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
  72. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +22 -0
  73. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
  74. package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
  75. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +4 -1
  76. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +8 -4
  77. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +6 -4
  78. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +14 -12
  79. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +5 -3
  80. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +15 -10
  81. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +12 -6
  82. package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +269 -110
  84. package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
  85. package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
  86. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +2 -8
  87. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
  88. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
  89. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
  90. package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
  91. package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
  92. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
  93. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
  94. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
  95. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
  96. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +97 -0
  97. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +11 -0
  98. package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
  99. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +14 -5
  100. package/cpp/llama.cpp/ggml/src/ggml-impl.h +125 -183
  101. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
  102. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +51 -9
  103. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +394 -80
  104. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +616 -239
  105. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
  106. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +3 -0
  107. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +741 -571
  108. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
  109. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
  110. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  111. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
  112. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
  113. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
  114. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
  115. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
  116. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
  117. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
  118. package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
  119. package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
  120. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
  121. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -24
  122. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
  123. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
  124. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +99 -166
  125. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +94 -72
  126. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
  127. package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
  128. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +697 -1098
  129. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
  130. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +6 -9
  131. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +104 -62
  132. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
  133. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
  134. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
  135. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +132 -201
  136. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
  137. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +39 -38
  138. package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
  139. package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
  140. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
  141. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
  142. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
  143. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
  144. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
  145. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +12 -1
  146. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +767 -292
  147. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -0
  148. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +58 -7
  149. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
  150. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
  151. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
  152. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
  153. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
  154. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
  155. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
  156. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
  157. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
  158. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
  159. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
  160. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
  161. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
  162. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
  163. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +12 -3
  164. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
  165. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
  166. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
  167. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
  168. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
  169. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
  170. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
  171. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
  172. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +23 -3
  173. package/cpp/llama.cpp/ggml/src/ggml.c +449 -72
  174. package/cpp/llama.cpp/ggml/src/gguf.cpp +13 -2
  175. package/cpp/llama.cpp/gguf-py/gguf/constants.py +285 -0
  176. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +27 -0
  177. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +137 -21
  178. package/cpp/llama.cpp/gguf-py/gguf/vocab.py +109 -7
  179. package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
  180. package/cpp/llama.cpp/include/llama.h +8 -43
  181. package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
  182. package/cpp/llama.cpp/src/llama-arch.cpp +265 -3
  183. package/cpp/llama.cpp/src/llama-arch.h +36 -1
  184. package/cpp/llama.cpp/src/llama-batch.cpp +596 -359
  185. package/cpp/llama.cpp/src/llama-batch.h +105 -70
  186. package/cpp/llama.cpp/src/llama-chat.cpp +26 -6
  187. package/cpp/llama.cpp/src/llama-chat.h +1 -0
  188. package/cpp/llama.cpp/src/llama-context.cpp +101 -107
  189. package/cpp/llama.cpp/src/llama-context.h +13 -13
  190. package/cpp/llama.cpp/src/llama-graph.cpp +286 -404
  191. package/cpp/llama.cpp/src/llama-graph.h +78 -79
  192. package/cpp/llama.cpp/src/llama-hparams.cpp +11 -1
  193. package/cpp/llama.cpp/src/llama-hparams.h +11 -0
  194. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +74 -66
  195. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +23 -26
  196. package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +312 -157
  197. package/cpp/llama.cpp/src/llama-kv-cache-unified.h +79 -46
  198. package/cpp/llama.cpp/src/llama-kv-cells.h +97 -21
  199. package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +73 -69
  200. package/cpp/llama.cpp/src/llama-memory-hybrid.h +19 -22
  201. package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +88 -77
  202. package/cpp/llama.cpp/src/llama-memory-recurrent.h +15 -20
  203. package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
  204. package/cpp/llama.cpp/src/llama-memory.h +21 -22
  205. package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
  206. package/cpp/llama.cpp/src/llama-model.cpp +5301 -2922
  207. package/cpp/llama.cpp/src/llama-model.h +40 -0
  208. package/cpp/llama.cpp/src/llama-quant.cpp +88 -5
  209. package/cpp/llama.cpp/src/llama-vocab.cpp +37 -3
  210. package/cpp/llama.cpp/src/llama-vocab.h +42 -0
  211. package/cpp/rn-utils.h +3 -0
  212. package/ios/include/chat.h +2 -0
  213. package/ios/include/common.h +5 -0
  214. package/ios/include/llama.h +8 -43
  215. package/ios/libs/llama.xcframework/Info.plist +19 -19
  216. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  217. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4863
  218. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  219. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  220. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +206 -10
  221. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +8 -43
  222. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  223. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  224. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
  225. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3742
  226. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  227. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  228. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
  229. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
  230. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  231. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  232. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
  233. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3891 -3744
  234. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
  235. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
  236. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +206 -10
  237. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +8 -43
  238. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
  239. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
  240. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +206 -10
  241. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +8 -43
  242. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  243. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
  244. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
  245. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +206 -10
  246. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +8 -43
  247. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  248. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  249. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  250. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4863
  251. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  252. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  253. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +206 -10
  254. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +8 -43
  255. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  256. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  257. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
  258. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3742
  259. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  260. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  261. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
  262. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
  263. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  264. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  265. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5095 -4900
  266. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  267. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  268. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +206 -10
  269. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +8 -43
  270. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  271. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  272. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5066 -4871
  273. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3919 -3773
  274. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  275. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  276. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
  277. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
  278. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  279. package/package.json +1 -1
  280. package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
  281. package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  282. package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  283. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
  284. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
  285. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
  286. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
  287. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
  288. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
  289. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
  290. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
  291. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
  292. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
  293. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
  294. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
  295. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
  296. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
  297. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
  298. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
  299. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
  300. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
  301. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
  302. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
  303. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
  304. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
  305. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
  306. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
  307. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
  308. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
  309. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
  310. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
  311. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
  312. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
  313. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
  314. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
  315. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
  316. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
  317. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
  318. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
  319. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
@@ -335,7 +335,11 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
335
335
 
336
336
  for (uint32_t i = 0; i < magic.size(); i++) {
337
337
  if (magic[i] != GGUF_MAGIC[i]) {
338
- GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
338
+ char c0 = isprint(magic[0]) ? magic[0] : '?';
339
+ char c1 = isprint(magic[1]) ? magic[1] : '?';
340
+ char c2 = isprint(magic[2]) ? magic[2] : '?';
341
+ char c3 = isprint(magic[3]) ? magic[3] : '?';
342
+ GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, c0, c1, c2, c3);
339
343
  gguf_free(ctx);
340
344
  return nullptr;
341
345
  }
@@ -627,7 +631,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
627
631
  gguf_free(ctx);
628
632
  return nullptr;
629
633
  }
630
- ctx->size += GGML_PAD(ggml_nbytes(&ti.t), ctx->alignment);
634
+ size_t padded_size = GGML_PAD(ggml_nbytes(&ti.t), ctx->alignment);
635
+ if (SIZE_MAX - ctx->size < padded_size) {
636
+ GGML_LOG_ERROR("%s: tensor '%s' size overflow, cannot accumulate size %zu + %zu\n",
637
+ __func__, ti.t.name, ctx->size, padded_size);
638
+ gguf_free(ctx);
639
+ return nullptr;
640
+ }
641
+ ctx->size += padded_size;
631
642
  }
632
643
  }
633
644
 
@@ -118,6 +118,10 @@ class Keys:
118
118
  EMBEDDING_SCALE = "{arch}.embedding_scale"
119
119
  TOKEN_SHIFT_COUNT = "{arch}.token_shift_count"
120
120
  INTERLEAVE_MOE_LAYER_STEP = "{arch}.interleave_moe_layer_step"
121
+ ACTIVATION_SPARSITY_SCALE = "{arch}.activation_sparsity_scale"
122
+ ALTUP_ACTIVE_IDX = "{arch}.altup.active_idx"
123
+ ALTUP_NUM_INPUTS = "{arch}.altup.num_inputs"
124
+ EMBD_LENGTH_PER_LAYER_INP = "{arch}.embedding_length_per_layer_input"
121
125
 
122
126
  class Attention:
123
127
  HEAD_COUNT = "{arch}.attention.head_count"
@@ -142,6 +146,8 @@ class Keys:
142
146
  SCALE = "{arch}.attention.scale"
143
147
  KEY_LENGTH_MLA = "{arch}.attention.key_length_mla"
144
148
  VALUE_LENGTH_MLA = "{arch}.attention.value_length_mla"
149
+ SHARED_KV_LAYERS = "{arch}.attention.shared_kv_layers"
150
+ SLIDING_WINDOW_PATTERN = "{arch}.attention.sliding_window_pattern"
145
151
 
146
152
  class Rope:
147
153
  DIMENSION_COUNT = "{arch}.rope.dimension_count"
@@ -164,6 +170,7 @@ class Keys:
164
170
  INNER_SIZE = "{arch}.ssm.inner_size"
165
171
  STATE_SIZE = "{arch}.ssm.state_size"
166
172
  TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
173
+ GROUP_COUNT = "{arch}.ssm.group_count"
167
174
  DT_B_C_RMS = "{arch}.ssm.dt_b_c_rms"
168
175
 
169
176
  class WKV:
@@ -180,6 +187,9 @@ class Keys:
180
187
  class Classifier:
181
188
  OUTPUT_LABELS = "{arch}.classifier.output_labels"
182
189
 
190
+ class ShortConv:
191
+ L_CACHE = "{arch}.shortconv.l_cache"
192
+
183
193
  class Tokenizer:
184
194
  MODEL = "tokenizer.ggml.model"
185
195
  PRE = "tokenizer.ggml.pre"
@@ -198,6 +208,7 @@ class Keys:
198
208
  MASK_ID = "tokenizer.ggml.mask_token_id"
199
209
  ADD_BOS = "tokenizer.ggml.add_bos_token"
200
210
  ADD_EOS = "tokenizer.ggml.add_eos_token"
211
+ ADD_SEP = "tokenizer.ggml.add_sep_token"
201
212
  ADD_PREFIX = "tokenizer.ggml.add_space_prefix"
202
213
  REMOVE_EXTRA_WS = "tokenizer.ggml.remove_extra_whitespaces"
203
214
  PRECOMPILED_CHARSMAP = "tokenizer.ggml.precompiled_charsmap"
@@ -280,6 +291,7 @@ class MODEL_ARCH(IntEnum):
280
291
  LLAMA4 = auto()
281
292
  DECI = auto()
282
293
  FALCON = auto()
294
+ FALCON_H1 = auto()
283
295
  BAICHUAN = auto()
284
296
  GROK = auto()
285
297
  GPT2 = auto()
@@ -313,12 +325,15 @@ class MODEL_ARCH(IntEnum):
313
325
  GEMMA = auto()
314
326
  GEMMA2 = auto()
315
327
  GEMMA3 = auto()
328
+ GEMMA3N = auto()
316
329
  STARCODER2 = auto()
317
330
  RWKV6 = auto()
318
331
  RWKV6QWEN2 = auto()
319
332
  RWKV7 = auto()
320
333
  ARWKV7 = auto()
321
334
  MAMBA = auto()
335
+ MAMBA2 = auto()
336
+ JAMBA = auto()
322
337
  XVERSE = auto()
323
338
  COMMAND_R = auto()
324
339
  COHERE2 = auto()
@@ -340,12 +355,17 @@ class MODEL_ARCH(IntEnum):
340
355
  EXAONE = auto()
341
356
  GRANITE = auto()
342
357
  GRANITE_MOE = auto()
358
+ GRANITE_HYBRID = auto()
343
359
  CHAMELEON = auto()
344
360
  WAVTOKENIZER_DEC = auto()
345
361
  PLM = auto()
346
362
  BAILINGMOE = auto()
347
363
  DOTS1 = auto()
348
364
  ARCEE = auto()
365
+ ERNIE4_5 = auto()
366
+ HUNYUAN_MOE = auto()
367
+ SMOLLM3 = auto()
368
+ LFM2 = auto()
349
369
 
350
370
 
351
371
  class VISION_PROJECTOR_TYPE(IntEnum):
@@ -398,12 +418,32 @@ class MODEL_TENSOR(IntEnum):
398
418
  ATTN_Q_NORM = auto()
399
419
  ATTN_K_NORM = auto()
400
420
  LAYER_OUT_NORM = auto()
421
+ PER_LAYER_TOKEN_EMBD = auto() # gemma3n
422
+ PER_LAYER_MODEL_PROJ = auto() # gemma3n
423
+ PER_LAYER_INP_GATE = auto() # gemma3n
424
+ PER_LAYER_PROJ = auto() # gemma3n
425
+ PER_LAYER_PROJ_NORM = auto() # gemma3n
426
+ PER_LAYER_POST_NORM = auto() # gemma3n
427
+ ALTUP_PROJ = auto() # gemma3n
428
+ ALTUP_UNEMBD_PROJ = auto() # gemma3n
429
+ ALTUP_CORRECT_COEF = auto() # gemma3n
430
+ ALTUP_CORRECT_SCALE = auto() # gemma3n
431
+ ALTUP_PREDICT_COEF = auto() # gemma3n
432
+ ALTUP_ROUTER = auto() # gemma3n
433
+ ALTUP_ROUTER_NORM = auto() # gemma3n
434
+ LAUREL_L = auto() # gemma3n
435
+ LAUREL_R = auto() # gemma3n
436
+ LAUREL_POST_NORM = auto() # gemma3n
401
437
  SSM_IN = auto()
402
438
  SSM_CONV1D = auto()
403
439
  SSM_X = auto()
404
440
  SSM_DT = auto()
441
+ SSM_DT_NORM = auto()
405
442
  SSM_A = auto()
443
+ SSM_B_NORM = auto()
444
+ SSM_C_NORM = auto()
406
445
  SSM_D = auto()
446
+ SSM_NORM = auto()
407
447
  SSM_OUT = auto()
408
448
  TIME_MIX_W0 = auto()
409
449
  TIME_MIX_W1 = auto()
@@ -497,6 +537,9 @@ class MODEL_TENSOR(IntEnum):
497
537
  POSNET_ATTN_K = auto()
498
538
  POSNET_ATTN_V = auto()
499
539
  POSNET_ATTN_OUT = auto()
540
+ SHORTCONV_CONV = auto()
541
+ SHORTCONV_INPROJ = auto()
542
+ SHORTCONV_OUTPROJ = auto()
500
543
  # vision
501
544
  V_MMPROJ = auto()
502
545
  V_MMPROJ_FC = auto()
@@ -596,12 +639,15 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
596
639
  MODEL_ARCH.GEMMA: "gemma",
597
640
  MODEL_ARCH.GEMMA2: "gemma2",
598
641
  MODEL_ARCH.GEMMA3: "gemma3",
642
+ MODEL_ARCH.GEMMA3N: "gemma3n",
599
643
  MODEL_ARCH.STARCODER2: "starcoder2",
600
644
  MODEL_ARCH.RWKV6: "rwkv6",
601
645
  MODEL_ARCH.RWKV6QWEN2: "rwkv6qwen2",
602
646
  MODEL_ARCH.RWKV7: "rwkv7",
603
647
  MODEL_ARCH.ARWKV7: "arwkv7",
604
648
  MODEL_ARCH.MAMBA: "mamba",
649
+ MODEL_ARCH.MAMBA2: "mamba2",
650
+ MODEL_ARCH.JAMBA: "jamba",
605
651
  MODEL_ARCH.XVERSE: "xverse",
606
652
  MODEL_ARCH.COMMAND_R: "command-r",
607
653
  MODEL_ARCH.COHERE2: "cohere2",
@@ -623,12 +669,18 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
623
669
  MODEL_ARCH.EXAONE: "exaone",
624
670
  MODEL_ARCH.GRANITE: "granite",
625
671
  MODEL_ARCH.GRANITE_MOE: "granitemoe",
672
+ MODEL_ARCH.GRANITE_HYBRID: "granitehybrid",
626
673
  MODEL_ARCH.CHAMELEON: "chameleon",
627
674
  MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
628
675
  MODEL_ARCH.PLM: "plm",
629
676
  MODEL_ARCH.BAILINGMOE: "bailingmoe",
630
677
  MODEL_ARCH.DOTS1: "dots1",
631
678
  MODEL_ARCH.ARCEE: "arcee",
679
+ MODEL_ARCH.ERNIE4_5: "ernie4_5",
680
+ MODEL_ARCH.FALCON_H1: "falcon-h1",
681
+ MODEL_ARCH.HUNYUAN_MOE: "hunyuan-moe",
682
+ MODEL_ARCH.SMOLLM3: "smollm3",
683
+ MODEL_ARCH.LFM2: "lfm2",
632
684
  }
633
685
 
634
686
  VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {
@@ -681,12 +733,32 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
681
733
  MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
682
734
  MODEL_TENSOR.FFN_EXP_PROBS_B: "blk.{bid}.exp_probs_b",
683
735
  MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
736
+ MODEL_TENSOR.PER_LAYER_TOKEN_EMBD: "per_layer_token_embd", # gemma3n
737
+ MODEL_TENSOR.PER_LAYER_MODEL_PROJ: "per_layer_model_proj", # gemma3n
738
+ MODEL_TENSOR.PER_LAYER_PROJ_NORM: "per_layer_proj_norm", # gemma3n
739
+ MODEL_TENSOR.ALTUP_UNEMBD_PROJ: "altup_unembd_proj", # gemma3n
740
+ MODEL_TENSOR.ALTUP_PROJ: "altup_proj", # gemma3n
741
+ MODEL_TENSOR.PER_LAYER_INP_GATE: "blk.{bid}.inp_gate", # gemma3n
742
+ MODEL_TENSOR.PER_LAYER_PROJ: "blk.{bid}.proj", # gemma3n
743
+ MODEL_TENSOR.PER_LAYER_POST_NORM: "blk.{bid}.post_norm", # gemma3n
744
+ MODEL_TENSOR.ALTUP_CORRECT_COEF: "blk.{bid}.altup_correct_coef", # gemma3n
745
+ MODEL_TENSOR.ALTUP_CORRECT_SCALE: "blk.{bid}.altup_correct_scale", # gemma3n
746
+ MODEL_TENSOR.ALTUP_PREDICT_COEF: "blk.{bid}.altup_predict_coef", # gemma3n
747
+ MODEL_TENSOR.ALTUP_ROUTER: "blk.{bid}.altup_router", # gemma3n
748
+ MODEL_TENSOR.ALTUP_ROUTER_NORM: "blk.{bid}.altup_router_norm", # gemma3n
749
+ MODEL_TENSOR.LAUREL_L: "blk.{bid}.laurel_l", # gemma3n
750
+ MODEL_TENSOR.LAUREL_R: "blk.{bid}.laurel_r", # gemma3n
751
+ MODEL_TENSOR.LAUREL_POST_NORM: "blk.{bid}.laurel_post_norm", # gemma3n
684
752
  MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
685
753
  MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
686
754
  MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
687
755
  MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
756
+ MODEL_TENSOR.SSM_DT_NORM: "blk.{bid}.ssm_dt_norm",
688
757
  MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
758
+ MODEL_TENSOR.SSM_B_NORM: "blk.{bid}.ssm_b_norm",
759
+ MODEL_TENSOR.SSM_C_NORM: "blk.{bid}.ssm_c_norm",
689
760
  MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
761
+ MODEL_TENSOR.SSM_NORM: "blk.{bid}.ssm_norm",
690
762
  MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
691
763
  MODEL_TENSOR.TIME_MIX_W0: "blk.{bid}.time_mix_w0",
692
764
  MODEL_TENSOR.TIME_MIX_W1: "blk.{bid}.time_mix_w1",
@@ -780,6 +852,9 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
780
852
  MODEL_TENSOR.POSNET_ATTN_K: "posnet.{bid}.attn_k",
781
853
  MODEL_TENSOR.POSNET_ATTN_V: "posnet.{bid}.attn_v",
782
854
  MODEL_TENSOR.POSNET_ATTN_OUT: "posnet.{bid}.attn_output",
855
+ MODEL_TENSOR.SHORTCONV_CONV: "blk.{bid}.shortconv.conv",
856
+ MODEL_TENSOR.SHORTCONV_INPROJ: "blk.{bid}.shortconv.in_proj",
857
+ MODEL_TENSOR.SHORTCONV_OUTPROJ: "blk.{bid}.shortconv.out_proj",
783
858
  # vision
784
859
  MODEL_TENSOR.V_MMPROJ: "mm.{bid}",
785
860
  MODEL_TENSOR.V_MMPROJ_FC: "mm.model.fc",
@@ -1485,6 +1560,41 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1485
1560
  MODEL_TENSOR.FFN_PRE_NORM,
1486
1561
  MODEL_TENSOR.FFN_POST_NORM,
1487
1562
  ],
1563
+ MODEL_ARCH.GEMMA3N: [
1564
+ MODEL_TENSOR.TOKEN_EMBD,
1565
+ MODEL_TENSOR.OUTPUT,
1566
+ MODEL_TENSOR.OUTPUT_NORM,
1567
+ MODEL_TENSOR.ATTN_Q,
1568
+ MODEL_TENSOR.ATTN_Q_NORM,
1569
+ MODEL_TENSOR.ATTN_K,
1570
+ MODEL_TENSOR.ATTN_K_NORM,
1571
+ MODEL_TENSOR.ATTN_V,
1572
+ MODEL_TENSOR.ATTN_OUT,
1573
+ MODEL_TENSOR.FFN_GATE,
1574
+ MODEL_TENSOR.FFN_DOWN,
1575
+ MODEL_TENSOR.FFN_UP,
1576
+ MODEL_TENSOR.ATTN_NORM,
1577
+ MODEL_TENSOR.ATTN_POST_NORM,
1578
+ MODEL_TENSOR.FFN_PRE_NORM,
1579
+ MODEL_TENSOR.FFN_POST_NORM,
1580
+ # altup / laurel
1581
+ MODEL_TENSOR.PER_LAYER_TOKEN_EMBD,
1582
+ MODEL_TENSOR.PER_LAYER_MODEL_PROJ,
1583
+ MODEL_TENSOR.PER_LAYER_INP_GATE,
1584
+ MODEL_TENSOR.PER_LAYER_PROJ,
1585
+ MODEL_TENSOR.PER_LAYER_PROJ_NORM,
1586
+ MODEL_TENSOR.PER_LAYER_POST_NORM,
1587
+ MODEL_TENSOR.ALTUP_PROJ,
1588
+ MODEL_TENSOR.ALTUP_UNEMBD_PROJ,
1589
+ MODEL_TENSOR.ALTUP_CORRECT_COEF,
1590
+ MODEL_TENSOR.ALTUP_CORRECT_SCALE,
1591
+ MODEL_TENSOR.ALTUP_PREDICT_COEF,
1592
+ MODEL_TENSOR.ALTUP_ROUTER,
1593
+ MODEL_TENSOR.ALTUP_ROUTER_NORM,
1594
+ MODEL_TENSOR.LAUREL_L,
1595
+ MODEL_TENSOR.LAUREL_R,
1596
+ MODEL_TENSOR.LAUREL_POST_NORM,
1597
+ ],
1488
1598
  MODEL_ARCH.STARCODER2: [
1489
1599
  MODEL_TENSOR.TOKEN_EMBD,
1490
1600
  MODEL_TENSOR.OUTPUT_NORM,
@@ -1636,6 +1746,47 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1636
1746
  MODEL_TENSOR.SSM_D,
1637
1747
  MODEL_TENSOR.SSM_OUT,
1638
1748
  ],
1749
+ MODEL_ARCH.MAMBA2: [
1750
+ MODEL_TENSOR.TOKEN_EMBD,
1751
+ MODEL_TENSOR.OUTPUT_NORM,
1752
+ MODEL_TENSOR.OUTPUT,
1753
+ MODEL_TENSOR.ATTN_NORM,
1754
+ MODEL_TENSOR.SSM_IN,
1755
+ MODEL_TENSOR.SSM_CONV1D,
1756
+ MODEL_TENSOR.SSM_DT,
1757
+ MODEL_TENSOR.SSM_A,
1758
+ MODEL_TENSOR.SSM_D,
1759
+ MODEL_TENSOR.SSM_NORM,
1760
+ MODEL_TENSOR.SSM_OUT,
1761
+ ],
1762
+ MODEL_ARCH.JAMBA: [
1763
+ MODEL_TENSOR.TOKEN_EMBD,
1764
+ MODEL_TENSOR.OUTPUT_NORM,
1765
+ MODEL_TENSOR.OUTPUT,
1766
+ MODEL_TENSOR.ATTN_NORM,
1767
+ MODEL_TENSOR.ATTN_Q,
1768
+ MODEL_TENSOR.ATTN_K,
1769
+ MODEL_TENSOR.ATTN_V,
1770
+ MODEL_TENSOR.ATTN_OUT,
1771
+ MODEL_TENSOR.SSM_IN,
1772
+ MODEL_TENSOR.SSM_CONV1D,
1773
+ MODEL_TENSOR.SSM_X,
1774
+ MODEL_TENSOR.SSM_DT,
1775
+ MODEL_TENSOR.SSM_DT_NORM,
1776
+ MODEL_TENSOR.SSM_A,
1777
+ MODEL_TENSOR.SSM_B_NORM,
1778
+ MODEL_TENSOR.SSM_C_NORM,
1779
+ MODEL_TENSOR.SSM_D,
1780
+ MODEL_TENSOR.SSM_OUT,
1781
+ MODEL_TENSOR.FFN_GATE_INP,
1782
+ MODEL_TENSOR.FFN_NORM,
1783
+ MODEL_TENSOR.FFN_GATE,
1784
+ MODEL_TENSOR.FFN_DOWN,
1785
+ MODEL_TENSOR.FFN_UP,
1786
+ MODEL_TENSOR.FFN_GATE_EXP,
1787
+ MODEL_TENSOR.FFN_DOWN_EXP,
1788
+ MODEL_TENSOR.FFN_UP_EXP,
1789
+ ],
1639
1790
  MODEL_ARCH.XVERSE: [
1640
1791
  MODEL_TENSOR.TOKEN_EMBD,
1641
1792
  MODEL_TENSOR.OUTPUT_NORM,
@@ -2005,6 +2156,36 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
2005
2156
  MODEL_TENSOR.FFN_UP_SHEXP,
2006
2157
  MODEL_TENSOR.FFN_DOWN_SHEXP,
2007
2158
  ],
2159
+ MODEL_ARCH.GRANITE_HYBRID: [
2160
+ MODEL_TENSOR.TOKEN_EMBD,
2161
+ MODEL_TENSOR.OUTPUT_NORM,
2162
+ MODEL_TENSOR.OUTPUT,
2163
+ MODEL_TENSOR.ATTN_NORM,
2164
+ MODEL_TENSOR.SSM_IN,
2165
+ MODEL_TENSOR.SSM_CONV1D,
2166
+ MODEL_TENSOR.SSM_DT,
2167
+ MODEL_TENSOR.SSM_A,
2168
+ MODEL_TENSOR.SSM_D,
2169
+ MODEL_TENSOR.SSM_NORM,
2170
+ MODEL_TENSOR.SSM_OUT,
2171
+ MODEL_TENSOR.ATTN_Q,
2172
+ MODEL_TENSOR.ATTN_K,
2173
+ MODEL_TENSOR.ATTN_V,
2174
+ MODEL_TENSOR.ATTN_OUT,
2175
+ MODEL_TENSOR.FFN_NORM,
2176
+ # MoE
2177
+ MODEL_TENSOR.FFN_GATE_INP,
2178
+ MODEL_TENSOR.FFN_GATE_EXP,
2179
+ MODEL_TENSOR.FFN_DOWN_EXP,
2180
+ MODEL_TENSOR.FFN_UP_EXP,
2181
+ MODEL_TENSOR.FFN_GATE_SHEXP,
2182
+ MODEL_TENSOR.FFN_UP_SHEXP,
2183
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
2184
+ # Dense
2185
+ MODEL_TENSOR.FFN_GATE,
2186
+ MODEL_TENSOR.FFN_DOWN,
2187
+ MODEL_TENSOR.FFN_UP,
2188
+ ],
2008
2189
  MODEL_ARCH.CHAMELEON: [
2009
2190
  MODEL_TENSOR.TOKEN_EMBD,
2010
2191
  MODEL_TENSOR.OUTPUT_NORM,
@@ -2101,6 +2282,109 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
2101
2282
  MODEL_TENSOR.FFN_DOWN,
2102
2283
  MODEL_TENSOR.FFN_UP,
2103
2284
  ],
2285
+ MODEL_ARCH.ERNIE4_5: [
2286
+ MODEL_TENSOR.TOKEN_EMBD,
2287
+ MODEL_TENSOR.OUTPUT_NORM,
2288
+ MODEL_TENSOR.OUTPUT,
2289
+ MODEL_TENSOR.ATTN_NORM,
2290
+ MODEL_TENSOR.ATTN_Q,
2291
+ MODEL_TENSOR.ATTN_K,
2292
+ MODEL_TENSOR.ATTN_V,
2293
+ MODEL_TENSOR.ATTN_OUT,
2294
+ MODEL_TENSOR.FFN_NORM,
2295
+ MODEL_TENSOR.FFN_GATE,
2296
+ MODEL_TENSOR.FFN_DOWN,
2297
+ MODEL_TENSOR.FFN_UP,
2298
+ ],
2299
+ MODEL_ARCH.FALCON_H1: [
2300
+ # Token embedding
2301
+ MODEL_TENSOR.TOKEN_EMBD,
2302
+
2303
+ # Input layernorm
2304
+ MODEL_TENSOR.ATTN_NORM,
2305
+
2306
+ # Attention components
2307
+ MODEL_TENSOR.ATTN_Q, # Query projection
2308
+ MODEL_TENSOR.ATTN_K, # Key projection
2309
+ MODEL_TENSOR.ATTN_V, # Value projection
2310
+ MODEL_TENSOR.ATTN_OUT, # Output projection
2311
+
2312
+ # SSM components (Mamba2 specific)
2313
+ MODEL_TENSOR.SSM_IN, # Input projection for SSM
2314
+ MODEL_TENSOR.SSM_CONV1D, # Convolution layer
2315
+ MODEL_TENSOR.SSM_DT, # Delta time projection
2316
+ MODEL_TENSOR.SSM_A, # A parameter (log form)
2317
+ MODEL_TENSOR.SSM_D, # D parameter
2318
+ MODEL_TENSOR.SSM_NORM, # Normalization in SSM
2319
+ MODEL_TENSOR.SSM_OUT, # Output projection
2320
+
2321
+ # Pre-feedforward layernorm
2322
+ MODEL_TENSOR.FFN_PRE_NORM,
2323
+
2324
+ # Feed-forward network components
2325
+ MODEL_TENSOR.FFN_GATE, # Gate projection (SwiGLU)
2326
+ MODEL_TENSOR.FFN_DOWN, # Down projection
2327
+ MODEL_TENSOR.FFN_UP, # Up projection
2328
+
2329
+ # Post-feedforward layernorm
2330
+ MODEL_TENSOR.OUTPUT_NORM, # Final layer norm
2331
+ MODEL_TENSOR.OUTPUT, # Output projection (lm_head)
2332
+ ],
2333
+ MODEL_ARCH.HUNYUAN_MOE: [
2334
+ MODEL_TENSOR.TOKEN_EMBD,
2335
+ MODEL_TENSOR.OUTPUT_NORM,
2336
+ MODEL_TENSOR.OUTPUT,
2337
+ MODEL_TENSOR.ROPE_FREQS,
2338
+ MODEL_TENSOR.ATTN_NORM,
2339
+ MODEL_TENSOR.ATTN_Q,
2340
+ MODEL_TENSOR.ATTN_Q_NORM,
2341
+ MODEL_TENSOR.ATTN_K,
2342
+ MODEL_TENSOR.ATTN_K_NORM,
2343
+ MODEL_TENSOR.ATTN_V,
2344
+ MODEL_TENSOR.ATTN_OUT,
2345
+ MODEL_TENSOR.FFN_GATE_INP,
2346
+ MODEL_TENSOR.FFN_NORM,
2347
+ MODEL_TENSOR.FFN_GATE_EXP,
2348
+ MODEL_TENSOR.FFN_DOWN_EXP,
2349
+ MODEL_TENSOR.FFN_UP_EXP,
2350
+ MODEL_TENSOR.FFN_GATE_SHEXP,
2351
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
2352
+ MODEL_TENSOR.FFN_UP_SHEXP,
2353
+ ],
2354
+ MODEL_ARCH.SMOLLM3: [
2355
+ MODEL_TENSOR.TOKEN_EMBD,
2356
+ MODEL_TENSOR.OUTPUT_NORM,
2357
+ MODEL_TENSOR.OUTPUT,
2358
+ MODEL_TENSOR.ROPE_FREQS,
2359
+ MODEL_TENSOR.ATTN_NORM,
2360
+ MODEL_TENSOR.ATTN_Q,
2361
+ MODEL_TENSOR.ATTN_K,
2362
+ MODEL_TENSOR.ATTN_V,
2363
+ MODEL_TENSOR.ATTN_OUT,
2364
+ MODEL_TENSOR.ATTN_ROT_EMBD,
2365
+ MODEL_TENSOR.FFN_NORM,
2366
+ MODEL_TENSOR.FFN_GATE,
2367
+ MODEL_TENSOR.FFN_DOWN,
2368
+ MODEL_TENSOR.FFN_UP,
2369
+ ],
2370
+ MODEL_ARCH.LFM2: [
2371
+ MODEL_TENSOR.TOKEN_EMBD,
2372
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
2373
+ MODEL_TENSOR.SHORTCONV_CONV,
2374
+ MODEL_TENSOR.SHORTCONV_INPROJ,
2375
+ MODEL_TENSOR.SHORTCONV_OUTPROJ,
2376
+ MODEL_TENSOR.FFN_GATE,
2377
+ MODEL_TENSOR.FFN_DOWN,
2378
+ MODEL_TENSOR.FFN_UP,
2379
+ MODEL_TENSOR.FFN_NORM,
2380
+ MODEL_TENSOR.ATTN_NORM, # operator_norm
2381
+ MODEL_TENSOR.ATTN_Q_NORM,
2382
+ MODEL_TENSOR.ATTN_K_NORM,
2383
+ MODEL_TENSOR.ATTN_Q,
2384
+ MODEL_TENSOR.ATTN_K,
2385
+ MODEL_TENSOR.ATTN_V,
2386
+ MODEL_TENSOR.ATTN_OUT,
2387
+ ],
2104
2388
  # TODO
2105
2389
  }
2106
2390
 
@@ -2405,6 +2689,7 @@ KEY_SSM_CONV_KERNEL = Keys.SSM.CONV_KERNEL
2405
2689
  KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE
2406
2690
  KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE
2407
2691
  KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
2692
+ KEY_SSM_GROUP_COUNT = Keys.SSM.GROUP_COUNT
2408
2693
  KEY_SSM_DT_B_C_RMS = Keys.SSM.DT_B_C_RMS
2409
2694
 
2410
2695
  # tokenization
@@ -648,6 +648,9 @@ class GGUFWriter:
648
648
  def add_convnext_block_count(self, length: int) -> None:
649
649
  self.add_uint32(Keys.ConvNext.BLOCK_COUNT.format(arch=self.arch), length)
650
650
 
651
+ def add_shortconv_l_cache(self, length: int) -> None:
652
+ self.add_uint32(Keys.ShortConv.L_CACHE.format(arch=self.arch), length)
653
+
651
654
  def add_block_count(self, length: int) -> None:
652
655
  self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
653
656
 
@@ -672,6 +675,18 @@ class GGUFWriter:
672
675
  def add_decoder_start_token_id(self, id: int) -> None:
673
676
  self.add_uint32(Keys.LLM.DECODER_START_TOKEN_ID.format(arch=self.arch), id)
674
677
 
678
+ def add_embedding_length_per_layer_input(self, value: int) -> None:
679
+ self.add_uint32(Keys.LLM.EMBD_LENGTH_PER_LAYER_INP.format(arch=self.arch), value)
680
+
681
+ def add_altup_active_idx(self, val: int) -> None:
682
+ self.add_uint32(Keys.LLM.ALTUP_ACTIVE_IDX.format(arch=self.arch), val)
683
+
684
+ def add_altup_num_inputs(self, val: int) -> None:
685
+ self.add_uint32(Keys.LLM.ALTUP_NUM_INPUTS.format(arch=self.arch), val)
686
+
687
+ def add_activation_sparsity_scale(self, values: Sequence[float]) -> None:
688
+ self.add_array(Keys.LLM.ACTIVATION_SPARSITY_SCALE.format(arch=self.arch), values)
689
+
675
690
  def add_head_count(self, count: int | Sequence[int]) -> None:
676
691
  if isinstance(count, int):
677
692
  self.add_uint32(Keys.Attention.HEAD_COUNT.format(arch=self.arch), count)
@@ -702,6 +717,12 @@ class GGUFWriter:
702
717
  def add_clamp_kqv(self, value: float) -> None:
703
718
  self.add_float32(Keys.Attention.CLAMP_KQV.format(arch=self.arch), value)
704
719
 
720
+ def add_shared_kv_layers(self, value: int) -> None:
721
+ self.add_uint32(Keys.Attention.SHARED_KV_LAYERS.format(arch=self.arch), value)
722
+
723
+ def add_sliding_window_pattern(self, value: Sequence[bool]) -> None:
724
+ self.add_array(Keys.Attention.SLIDING_WINDOW_PATTERN.format(arch=self.arch), value)
725
+
705
726
  def add_logit_scale(self, value: float) -> None:
706
727
  self.add_float32(Keys.LLM.LOGIT_SCALE.format(arch=self.arch), value)
707
728
 
@@ -843,6 +864,9 @@ class GGUFWriter:
843
864
  def add_ssm_time_step_rank(self, value: int) -> None:
844
865
  self.add_uint32(Keys.SSM.TIME_STEP_RANK.format(arch=self.arch), value)
845
866
 
867
+ def add_ssm_group_count(self, value: int) -> None:
868
+ self.add_uint32(Keys.SSM.GROUP_COUNT.format(arch=self.arch), value)
869
+
846
870
  def add_ssm_dt_b_c_rms(self, value: bool) -> None:
847
871
  self.add_bool(Keys.SSM.DT_B_C_RMS.format(arch=self.arch), value)
848
872
 
@@ -891,6 +915,9 @@ class GGUFWriter:
891
915
  def add_add_eos_token(self, value: bool) -> None:
892
916
  self.add_bool(Keys.Tokenizer.ADD_EOS, value)
893
917
 
918
+ def add_add_sep_token(self, value: bool) -> None:
919
+ self.add_bool(Keys.Tokenizer.ADD_SEP, value)
920
+
894
921
  def add_add_space_prefix(self, value: bool) -> None:
895
922
  self.add_bool(Keys.Tokenizer.ADD_PREFIX, value)
896
923