@novastera-oss/llamarn 0.2.9 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  2. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  3. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  5. package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
  6. package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
  7. package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
  8. package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
  9. package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
  10. package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
  11. package/android/src/main/jniLibs/x86/libggml.so +0 -0
  12. package/android/src/main/jniLibs/x86/libllama.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  14. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  15. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  16. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  17. package/cpp/build-info.cpp +2 -2
  18. package/cpp/llama.cpp/CMakeLists.txt +0 -1
  19. package/cpp/llama.cpp/README.md +4 -5
  20. package/cpp/llama.cpp/build-xcframework.sh +1 -1
  21. package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
  22. package/cpp/llama.cpp/common/arg.cpp +17 -0
  23. package/cpp/llama.cpp/common/chat.cpp +37 -20
  24. package/cpp/llama.cpp/common/chat.h +2 -0
  25. package/cpp/llama.cpp/common/common.h +4 -0
  26. package/cpp/llama.cpp/convert_hf_to_gguf.py +745 -6
  27. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +9 -0
  28. package/cpp/llama.cpp/ggml/CMakeLists.txt +7 -2
  29. package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
  30. package/cpp/llama.cpp/ggml/include/ggml.h +173 -10
  31. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +0 -1
  32. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -8
  33. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +36 -18
  34. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +68 -5
  35. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +16 -2
  36. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +6 -1
  37. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +28 -1
  38. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1203 -163
  39. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +6 -0
  40. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1 -1
  41. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +33 -9
  42. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +142 -9
  43. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +17 -0
  44. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +22 -0
  45. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
  46. package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
  47. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +4 -1
  48. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +8 -4
  49. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +6 -4
  50. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +14 -12
  51. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +5 -3
  52. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +15 -10
  53. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +8 -6
  54. package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
  55. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +185 -79
  56. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +2 -8
  57. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
  58. package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
  59. package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
  60. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
  61. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
  62. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +97 -0
  63. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +11 -0
  64. package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
  65. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +14 -5
  66. package/cpp/llama.cpp/ggml/src/ggml-impl.h +64 -0
  67. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
  68. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +35 -9
  69. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +167 -39
  70. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +254 -57
  71. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +3 -0
  72. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +505 -40
  73. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
  74. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
  75. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  76. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
  77. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
  78. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
  79. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
  80. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
  81. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
  82. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
  83. package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
  84. package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
  85. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +693 -1034
  86. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
  87. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +60 -9
  88. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +15 -18
  89. package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
  90. package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
  91. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +711 -292
  92. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +58 -7
  93. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
  94. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
  95. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
  96. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
  97. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
  98. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
  99. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
  100. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
  101. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
  102. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
  103. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
  104. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
  105. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
  106. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
  107. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +12 -3
  108. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
  109. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
  110. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
  111. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
  112. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
  113. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
  114. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
  115. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
  116. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +23 -3
  117. package/cpp/llama.cpp/ggml/src/ggml.c +382 -61
  118. package/cpp/llama.cpp/ggml/src/gguf.cpp +8 -1
  119. package/cpp/llama.cpp/gguf-py/gguf/constants.py +209 -0
  120. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +8 -2
  121. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +73 -21
  122. package/cpp/llama.cpp/gguf-py/gguf/vocab.py +12 -3
  123. package/cpp/llama.cpp/include/llama.h +0 -40
  124. package/cpp/llama.cpp/src/llama-arch.cpp +210 -3
  125. package/cpp/llama.cpp/src/llama-arch.h +18 -1
  126. package/cpp/llama.cpp/src/llama-batch.cpp +27 -1
  127. package/cpp/llama.cpp/src/llama-batch.h +8 -1
  128. package/cpp/llama.cpp/src/llama-chat.cpp +15 -0
  129. package/cpp/llama.cpp/src/llama-chat.h +1 -0
  130. package/cpp/llama.cpp/src/llama-graph.cpp +119 -184
  131. package/cpp/llama.cpp/src/llama-graph.h +47 -60
  132. package/cpp/llama.cpp/src/llama-hparams.cpp +7 -1
  133. package/cpp/llama.cpp/src/llama-hparams.h +3 -0
  134. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +28 -18
  135. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +4 -2
  136. package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +214 -65
  137. package/cpp/llama.cpp/src/llama-kv-cache-unified.h +62 -24
  138. package/cpp/llama.cpp/src/llama-kv-cells.h +62 -10
  139. package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +9 -4
  140. package/cpp/llama.cpp/src/llama-memory-hybrid.h +3 -1
  141. package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +20 -10
  142. package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
  143. package/cpp/llama.cpp/src/llama-memory.h +3 -0
  144. package/cpp/llama.cpp/src/llama-model.cpp +2530 -685
  145. package/cpp/llama.cpp/src/llama-model.h +18 -0
  146. package/cpp/llama.cpp/src/llama-quant.cpp +1 -0
  147. package/cpp/llama.cpp/src/llama-vocab.cpp +13 -2
  148. package/cpp/llama.cpp/src/llama-vocab.h +41 -0
  149. package/ios/include/chat.h +2 -0
  150. package/ios/include/common.h +4 -0
  151. package/ios/include/llama.h +0 -40
  152. package/ios/libs/llama.xcframework/Info.plist +19 -19
  153. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  154. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5055 -4886
  155. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  156. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +173 -10
  157. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +0 -40
  158. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  159. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  160. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4861
  161. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3764
  162. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  163. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
  164. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -40
  165. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  166. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  167. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4861
  168. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3891 -3766
  169. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
  170. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +173 -10
  171. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +0 -40
  172. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
  173. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +173 -10
  174. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +0 -40
  175. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  176. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
  177. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +173 -10
  178. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +0 -40
  179. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  180. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  181. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  182. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4890
  183. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  184. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +173 -10
  185. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +0 -40
  186. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  187. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  188. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4861
  189. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3764
  190. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  191. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
  192. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -40
  193. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  194. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  195. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5091 -4922
  196. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  197. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +173 -10
  198. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +0 -40
  199. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  200. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  201. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5066 -4897
  202. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3919 -3794
  203. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  204. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
  205. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -40
  206. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  207. package/package.json +1 -1
  208. package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
  209. package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  210. package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  211. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
  212. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
  213. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
  214. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
  215. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
  216. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
  217. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
  218. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
  219. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
  220. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
  221. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
  222. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
  223. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
  224. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
  225. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
  226. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
  227. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
  228. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
  229. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
  230. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
  231. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
  232. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
  233. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
  234. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
  235. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
  236. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
  237. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
  238. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
  239. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
  240. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
  241. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
  242. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
  243. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
  244. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
  245. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
  246. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
  247. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
@@ -631,7 +631,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
631
631
  gguf_free(ctx);
632
632
  return nullptr;
633
633
  }
634
- ctx->size += GGML_PAD(ggml_nbytes(&ti.t), ctx->alignment);
634
+ size_t padded_size = GGML_PAD(ggml_nbytes(&ti.t), ctx->alignment);
635
+ if (SIZE_MAX - ctx->size < padded_size) {
636
+ GGML_LOG_ERROR("%s: tensor '%s' size overflow, cannot accumulate size %zu + %zu\n",
637
+ __func__, ti.t.name, ctx->size, padded_size);
638
+ gguf_free(ctx);
639
+ return nullptr;
640
+ }
641
+ ctx->size += padded_size;
635
642
  }
636
643
  }
637
644
 
@@ -170,6 +170,7 @@ class Keys:
170
170
  INNER_SIZE = "{arch}.ssm.inner_size"
171
171
  STATE_SIZE = "{arch}.ssm.state_size"
172
172
  TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
173
+ GROUP_COUNT = "{arch}.ssm.group_count"
173
174
  DT_B_C_RMS = "{arch}.ssm.dt_b_c_rms"
174
175
 
175
176
  class WKV:
@@ -186,6 +187,9 @@ class Keys:
186
187
  class Classifier:
187
188
  OUTPUT_LABELS = "{arch}.classifier.output_labels"
188
189
 
190
+ class ShortConv:
191
+ L_CACHE = "{arch}.shortconv.l_cache"
192
+
189
193
  class Tokenizer:
190
194
  MODEL = "tokenizer.ggml.model"
191
195
  PRE = "tokenizer.ggml.pre"
@@ -287,6 +291,7 @@ class MODEL_ARCH(IntEnum):
287
291
  LLAMA4 = auto()
288
292
  DECI = auto()
289
293
  FALCON = auto()
294
+ FALCON_H1 = auto()
290
295
  BAICHUAN = auto()
291
296
  GROK = auto()
292
297
  GPT2 = auto()
@@ -327,6 +332,8 @@ class MODEL_ARCH(IntEnum):
327
332
  RWKV7 = auto()
328
333
  ARWKV7 = auto()
329
334
  MAMBA = auto()
335
+ MAMBA2 = auto()
336
+ JAMBA = auto()
330
337
  XVERSE = auto()
331
338
  COMMAND_R = auto()
332
339
  COHERE2 = auto()
@@ -348,12 +355,17 @@ class MODEL_ARCH(IntEnum):
348
355
  EXAONE = auto()
349
356
  GRANITE = auto()
350
357
  GRANITE_MOE = auto()
358
+ GRANITE_HYBRID = auto()
351
359
  CHAMELEON = auto()
352
360
  WAVTOKENIZER_DEC = auto()
353
361
  PLM = auto()
354
362
  BAILINGMOE = auto()
355
363
  DOTS1 = auto()
356
364
  ARCEE = auto()
365
+ ERNIE4_5 = auto()
366
+ HUNYUAN_MOE = auto()
367
+ SMOLLM3 = auto()
368
+ LFM2 = auto()
357
369
 
358
370
 
359
371
  class VISION_PROJECTOR_TYPE(IntEnum):
@@ -426,8 +438,12 @@ class MODEL_TENSOR(IntEnum):
426
438
  SSM_CONV1D = auto()
427
439
  SSM_X = auto()
428
440
  SSM_DT = auto()
441
+ SSM_DT_NORM = auto()
429
442
  SSM_A = auto()
443
+ SSM_B_NORM = auto()
444
+ SSM_C_NORM = auto()
430
445
  SSM_D = auto()
446
+ SSM_NORM = auto()
431
447
  SSM_OUT = auto()
432
448
  TIME_MIX_W0 = auto()
433
449
  TIME_MIX_W1 = auto()
@@ -521,6 +537,9 @@ class MODEL_TENSOR(IntEnum):
521
537
  POSNET_ATTN_K = auto()
522
538
  POSNET_ATTN_V = auto()
523
539
  POSNET_ATTN_OUT = auto()
540
+ SHORTCONV_CONV = auto()
541
+ SHORTCONV_INPROJ = auto()
542
+ SHORTCONV_OUTPROJ = auto()
524
543
  # vision
525
544
  V_MMPROJ = auto()
526
545
  V_MMPROJ_FC = auto()
@@ -627,6 +646,8 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
627
646
  MODEL_ARCH.RWKV7: "rwkv7",
628
647
  MODEL_ARCH.ARWKV7: "arwkv7",
629
648
  MODEL_ARCH.MAMBA: "mamba",
649
+ MODEL_ARCH.MAMBA2: "mamba2",
650
+ MODEL_ARCH.JAMBA: "jamba",
630
651
  MODEL_ARCH.XVERSE: "xverse",
631
652
  MODEL_ARCH.COMMAND_R: "command-r",
632
653
  MODEL_ARCH.COHERE2: "cohere2",
@@ -648,12 +669,18 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
648
669
  MODEL_ARCH.EXAONE: "exaone",
649
670
  MODEL_ARCH.GRANITE: "granite",
650
671
  MODEL_ARCH.GRANITE_MOE: "granitemoe",
672
+ MODEL_ARCH.GRANITE_HYBRID: "granitehybrid",
651
673
  MODEL_ARCH.CHAMELEON: "chameleon",
652
674
  MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
653
675
  MODEL_ARCH.PLM: "plm",
654
676
  MODEL_ARCH.BAILINGMOE: "bailingmoe",
655
677
  MODEL_ARCH.DOTS1: "dots1",
656
678
  MODEL_ARCH.ARCEE: "arcee",
679
+ MODEL_ARCH.ERNIE4_5: "ernie4_5",
680
+ MODEL_ARCH.FALCON_H1: "falcon-h1",
681
+ MODEL_ARCH.HUNYUAN_MOE: "hunyuan-moe",
682
+ MODEL_ARCH.SMOLLM3: "smollm3",
683
+ MODEL_ARCH.LFM2: "lfm2",
657
684
  }
658
685
 
659
686
  VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {
@@ -726,8 +753,12 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
726
753
  MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
727
754
  MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
728
755
  MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
756
+ MODEL_TENSOR.SSM_DT_NORM: "blk.{bid}.ssm_dt_norm",
729
757
  MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
758
+ MODEL_TENSOR.SSM_B_NORM: "blk.{bid}.ssm_b_norm",
759
+ MODEL_TENSOR.SSM_C_NORM: "blk.{bid}.ssm_c_norm",
730
760
  MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
761
+ MODEL_TENSOR.SSM_NORM: "blk.{bid}.ssm_norm",
731
762
  MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
732
763
  MODEL_TENSOR.TIME_MIX_W0: "blk.{bid}.time_mix_w0",
733
764
  MODEL_TENSOR.TIME_MIX_W1: "blk.{bid}.time_mix_w1",
@@ -821,6 +852,9 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
821
852
  MODEL_TENSOR.POSNET_ATTN_K: "posnet.{bid}.attn_k",
822
853
  MODEL_TENSOR.POSNET_ATTN_V: "posnet.{bid}.attn_v",
823
854
  MODEL_TENSOR.POSNET_ATTN_OUT: "posnet.{bid}.attn_output",
855
+ MODEL_TENSOR.SHORTCONV_CONV: "blk.{bid}.shortconv.conv",
856
+ MODEL_TENSOR.SHORTCONV_INPROJ: "blk.{bid}.shortconv.in_proj",
857
+ MODEL_TENSOR.SHORTCONV_OUTPROJ: "blk.{bid}.shortconv.out_proj",
824
858
  # vision
825
859
  MODEL_TENSOR.V_MMPROJ: "mm.{bid}",
826
860
  MODEL_TENSOR.V_MMPROJ_FC: "mm.model.fc",
@@ -1712,6 +1746,47 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1712
1746
  MODEL_TENSOR.SSM_D,
1713
1747
  MODEL_TENSOR.SSM_OUT,
1714
1748
  ],
1749
+ MODEL_ARCH.MAMBA2: [
1750
+ MODEL_TENSOR.TOKEN_EMBD,
1751
+ MODEL_TENSOR.OUTPUT_NORM,
1752
+ MODEL_TENSOR.OUTPUT,
1753
+ MODEL_TENSOR.ATTN_NORM,
1754
+ MODEL_TENSOR.SSM_IN,
1755
+ MODEL_TENSOR.SSM_CONV1D,
1756
+ MODEL_TENSOR.SSM_DT,
1757
+ MODEL_TENSOR.SSM_A,
1758
+ MODEL_TENSOR.SSM_D,
1759
+ MODEL_TENSOR.SSM_NORM,
1760
+ MODEL_TENSOR.SSM_OUT,
1761
+ ],
1762
+ MODEL_ARCH.JAMBA: [
1763
+ MODEL_TENSOR.TOKEN_EMBD,
1764
+ MODEL_TENSOR.OUTPUT_NORM,
1765
+ MODEL_TENSOR.OUTPUT,
1766
+ MODEL_TENSOR.ATTN_NORM,
1767
+ MODEL_TENSOR.ATTN_Q,
1768
+ MODEL_TENSOR.ATTN_K,
1769
+ MODEL_TENSOR.ATTN_V,
1770
+ MODEL_TENSOR.ATTN_OUT,
1771
+ MODEL_TENSOR.SSM_IN,
1772
+ MODEL_TENSOR.SSM_CONV1D,
1773
+ MODEL_TENSOR.SSM_X,
1774
+ MODEL_TENSOR.SSM_DT,
1775
+ MODEL_TENSOR.SSM_DT_NORM,
1776
+ MODEL_TENSOR.SSM_A,
1777
+ MODEL_TENSOR.SSM_B_NORM,
1778
+ MODEL_TENSOR.SSM_C_NORM,
1779
+ MODEL_TENSOR.SSM_D,
1780
+ MODEL_TENSOR.SSM_OUT,
1781
+ MODEL_TENSOR.FFN_GATE_INP,
1782
+ MODEL_TENSOR.FFN_NORM,
1783
+ MODEL_TENSOR.FFN_GATE,
1784
+ MODEL_TENSOR.FFN_DOWN,
1785
+ MODEL_TENSOR.FFN_UP,
1786
+ MODEL_TENSOR.FFN_GATE_EXP,
1787
+ MODEL_TENSOR.FFN_DOWN_EXP,
1788
+ MODEL_TENSOR.FFN_UP_EXP,
1789
+ ],
1715
1790
  MODEL_ARCH.XVERSE: [
1716
1791
  MODEL_TENSOR.TOKEN_EMBD,
1717
1792
  MODEL_TENSOR.OUTPUT_NORM,
@@ -2081,6 +2156,36 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
2081
2156
  MODEL_TENSOR.FFN_UP_SHEXP,
2082
2157
  MODEL_TENSOR.FFN_DOWN_SHEXP,
2083
2158
  ],
2159
+ MODEL_ARCH.GRANITE_HYBRID: [
2160
+ MODEL_TENSOR.TOKEN_EMBD,
2161
+ MODEL_TENSOR.OUTPUT_NORM,
2162
+ MODEL_TENSOR.OUTPUT,
2163
+ MODEL_TENSOR.ATTN_NORM,
2164
+ MODEL_TENSOR.SSM_IN,
2165
+ MODEL_TENSOR.SSM_CONV1D,
2166
+ MODEL_TENSOR.SSM_DT,
2167
+ MODEL_TENSOR.SSM_A,
2168
+ MODEL_TENSOR.SSM_D,
2169
+ MODEL_TENSOR.SSM_NORM,
2170
+ MODEL_TENSOR.SSM_OUT,
2171
+ MODEL_TENSOR.ATTN_Q,
2172
+ MODEL_TENSOR.ATTN_K,
2173
+ MODEL_TENSOR.ATTN_V,
2174
+ MODEL_TENSOR.ATTN_OUT,
2175
+ MODEL_TENSOR.FFN_NORM,
2176
+ # MoE
2177
+ MODEL_TENSOR.FFN_GATE_INP,
2178
+ MODEL_TENSOR.FFN_GATE_EXP,
2179
+ MODEL_TENSOR.FFN_DOWN_EXP,
2180
+ MODEL_TENSOR.FFN_UP_EXP,
2181
+ MODEL_TENSOR.FFN_GATE_SHEXP,
2182
+ MODEL_TENSOR.FFN_UP_SHEXP,
2183
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
2184
+ # Dense
2185
+ MODEL_TENSOR.FFN_GATE,
2186
+ MODEL_TENSOR.FFN_DOWN,
2187
+ MODEL_TENSOR.FFN_UP,
2188
+ ],
2084
2189
  MODEL_ARCH.CHAMELEON: [
2085
2190
  MODEL_TENSOR.TOKEN_EMBD,
2086
2191
  MODEL_TENSOR.OUTPUT_NORM,
@@ -2177,6 +2282,109 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
2177
2282
  MODEL_TENSOR.FFN_DOWN,
2178
2283
  MODEL_TENSOR.FFN_UP,
2179
2284
  ],
2285
+ MODEL_ARCH.ERNIE4_5: [
2286
+ MODEL_TENSOR.TOKEN_EMBD,
2287
+ MODEL_TENSOR.OUTPUT_NORM,
2288
+ MODEL_TENSOR.OUTPUT,
2289
+ MODEL_TENSOR.ATTN_NORM,
2290
+ MODEL_TENSOR.ATTN_Q,
2291
+ MODEL_TENSOR.ATTN_K,
2292
+ MODEL_TENSOR.ATTN_V,
2293
+ MODEL_TENSOR.ATTN_OUT,
2294
+ MODEL_TENSOR.FFN_NORM,
2295
+ MODEL_TENSOR.FFN_GATE,
2296
+ MODEL_TENSOR.FFN_DOWN,
2297
+ MODEL_TENSOR.FFN_UP,
2298
+ ],
2299
+ MODEL_ARCH.FALCON_H1: [
2300
+ # Token embedding
2301
+ MODEL_TENSOR.TOKEN_EMBD,
2302
+
2303
+ # Input layernorm
2304
+ MODEL_TENSOR.ATTN_NORM,
2305
+
2306
+ # Attention components
2307
+ MODEL_TENSOR.ATTN_Q, # Query projection
2308
+ MODEL_TENSOR.ATTN_K, # Key projection
2309
+ MODEL_TENSOR.ATTN_V, # Value projection
2310
+ MODEL_TENSOR.ATTN_OUT, # Output projection
2311
+
2312
+ # SSM components (Mamba2 specific)
2313
+ MODEL_TENSOR.SSM_IN, # Input projection for SSM
2314
+ MODEL_TENSOR.SSM_CONV1D, # Convolution layer
2315
+ MODEL_TENSOR.SSM_DT, # Delta time projection
2316
+ MODEL_TENSOR.SSM_A, # A parameter (log form)
2317
+ MODEL_TENSOR.SSM_D, # D parameter
2318
+ MODEL_TENSOR.SSM_NORM, # Normalization in SSM
2319
+ MODEL_TENSOR.SSM_OUT, # Output projection
2320
+
2321
+ # Pre-feedforward layernorm
2322
+ MODEL_TENSOR.FFN_PRE_NORM,
2323
+
2324
+ # Feed-forward network components
2325
+ MODEL_TENSOR.FFN_GATE, # Gate projection (SwiGLU)
2326
+ MODEL_TENSOR.FFN_DOWN, # Down projection
2327
+ MODEL_TENSOR.FFN_UP, # Up projection
2328
+
2329
+ # Post-feedforward layernorm
2330
+ MODEL_TENSOR.OUTPUT_NORM, # Final layer norm
2331
+ MODEL_TENSOR.OUTPUT, # Output projection (lm_head)
2332
+ ],
2333
+ MODEL_ARCH.HUNYUAN_MOE: [
2334
+ MODEL_TENSOR.TOKEN_EMBD,
2335
+ MODEL_TENSOR.OUTPUT_NORM,
2336
+ MODEL_TENSOR.OUTPUT,
2337
+ MODEL_TENSOR.ROPE_FREQS,
2338
+ MODEL_TENSOR.ATTN_NORM,
2339
+ MODEL_TENSOR.ATTN_Q,
2340
+ MODEL_TENSOR.ATTN_Q_NORM,
2341
+ MODEL_TENSOR.ATTN_K,
2342
+ MODEL_TENSOR.ATTN_K_NORM,
2343
+ MODEL_TENSOR.ATTN_V,
2344
+ MODEL_TENSOR.ATTN_OUT,
2345
+ MODEL_TENSOR.FFN_GATE_INP,
2346
+ MODEL_TENSOR.FFN_NORM,
2347
+ MODEL_TENSOR.FFN_GATE_EXP,
2348
+ MODEL_TENSOR.FFN_DOWN_EXP,
2349
+ MODEL_TENSOR.FFN_UP_EXP,
2350
+ MODEL_TENSOR.FFN_GATE_SHEXP,
2351
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
2352
+ MODEL_TENSOR.FFN_UP_SHEXP,
2353
+ ],
2354
+ MODEL_ARCH.SMOLLM3: [
2355
+ MODEL_TENSOR.TOKEN_EMBD,
2356
+ MODEL_TENSOR.OUTPUT_NORM,
2357
+ MODEL_TENSOR.OUTPUT,
2358
+ MODEL_TENSOR.ROPE_FREQS,
2359
+ MODEL_TENSOR.ATTN_NORM,
2360
+ MODEL_TENSOR.ATTN_Q,
2361
+ MODEL_TENSOR.ATTN_K,
2362
+ MODEL_TENSOR.ATTN_V,
2363
+ MODEL_TENSOR.ATTN_OUT,
2364
+ MODEL_TENSOR.ATTN_ROT_EMBD,
2365
+ MODEL_TENSOR.FFN_NORM,
2366
+ MODEL_TENSOR.FFN_GATE,
2367
+ MODEL_TENSOR.FFN_DOWN,
2368
+ MODEL_TENSOR.FFN_UP,
2369
+ ],
2370
+ MODEL_ARCH.LFM2: [
2371
+ MODEL_TENSOR.TOKEN_EMBD,
2372
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
2373
+ MODEL_TENSOR.SHORTCONV_CONV,
2374
+ MODEL_TENSOR.SHORTCONV_INPROJ,
2375
+ MODEL_TENSOR.SHORTCONV_OUTPROJ,
2376
+ MODEL_TENSOR.FFN_GATE,
2377
+ MODEL_TENSOR.FFN_DOWN,
2378
+ MODEL_TENSOR.FFN_UP,
2379
+ MODEL_TENSOR.FFN_NORM,
2380
+ MODEL_TENSOR.ATTN_NORM, # operator_norm
2381
+ MODEL_TENSOR.ATTN_Q_NORM,
2382
+ MODEL_TENSOR.ATTN_K_NORM,
2383
+ MODEL_TENSOR.ATTN_Q,
2384
+ MODEL_TENSOR.ATTN_K,
2385
+ MODEL_TENSOR.ATTN_V,
2386
+ MODEL_TENSOR.ATTN_OUT,
2387
+ ],
2180
2388
  # TODO
2181
2389
  }
2182
2390
 
@@ -2481,6 +2689,7 @@ KEY_SSM_CONV_KERNEL = Keys.SSM.CONV_KERNEL
2481
2689
  KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE
2482
2690
  KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE
2483
2691
  KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
2692
+ KEY_SSM_GROUP_COUNT = Keys.SSM.GROUP_COUNT
2484
2693
  KEY_SSM_DT_B_C_RMS = Keys.SSM.DT_B_C_RMS
2485
2694
 
2486
2695
  # tokenization
@@ -648,6 +648,9 @@ class GGUFWriter:
648
648
  def add_convnext_block_count(self, length: int) -> None:
649
649
  self.add_uint32(Keys.ConvNext.BLOCK_COUNT.format(arch=self.arch), length)
650
650
 
651
+ def add_shortconv_l_cache(self, length: int) -> None:
652
+ self.add_uint32(Keys.ShortConv.L_CACHE.format(arch=self.arch), length)
653
+
651
654
  def add_block_count(self, length: int) -> None:
652
655
  self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
653
656
 
@@ -714,8 +717,8 @@ class GGUFWriter:
714
717
  def add_clamp_kqv(self, value: float) -> None:
715
718
  self.add_float32(Keys.Attention.CLAMP_KQV.format(arch=self.arch), value)
716
719
 
717
- def add_shared_kv_layers(self, value: float) -> None:
718
- self.add_float32(Keys.Attention.SHARED_KV_LAYERS.format(arch=self.arch), value)
720
+ def add_shared_kv_layers(self, value: int) -> None:
721
+ self.add_uint32(Keys.Attention.SHARED_KV_LAYERS.format(arch=self.arch), value)
719
722
 
720
723
  def add_sliding_window_pattern(self, value: Sequence[bool]) -> None:
721
724
  self.add_array(Keys.Attention.SLIDING_WINDOW_PATTERN.format(arch=self.arch), value)
@@ -861,6 +864,9 @@ class GGUFWriter:
861
864
  def add_ssm_time_step_rank(self, value: int) -> None:
862
865
  self.add_uint32(Keys.SSM.TIME_STEP_RANK.format(arch=self.arch), value)
863
866
 
867
+ def add_ssm_group_count(self, value: int) -> None:
868
+ self.add_uint32(Keys.SSM.GROUP_COUNT.format(arch=self.arch), value)
869
+
864
870
  def add_ssm_dt_b_c_rms(self, value: bool) -> None:
865
871
  self.add_bool(Keys.SSM.DT_B_C_RMS.format(arch=self.arch), value)
866
872
 
@@ -13,7 +13,7 @@ class TensorNameMap:
13
13
  "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
14
14
  "transformer.word_embeddings", # falcon
15
15
  "word_embeddings", # bloom
16
- "model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414
16
+ "model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414 granite-hybrid
17
17
  "tok_embeddings", # llama-pth
18
18
  "embeddings.word_embeddings", # bert nomic-bert
19
19
  "language_model.embedding.word_embeddings", # persimmon
@@ -50,6 +50,7 @@ class TensorNameMap:
50
50
  "model.pre_ln", # rwkv7
51
51
  "model.layers.0.pre_norm", # rwkv7
52
52
  "backbone.norm", # wavtokenizer
53
+ "model.embedding_norm", # lfm2
53
54
  ),
54
55
 
55
56
  # Position embeddings
@@ -118,7 +119,7 @@ class TensorNameMap:
118
119
  "transformer.h.{bid}.input_layernorm", # falcon7b
119
120
  "h.{bid}.input_layernorm", # bloom
120
121
  "transformer.h.{bid}.ln_mlp", # falcon40b
121
- "model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe
122
+ "model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe granite-hybrid
122
123
  "layers.{bid}.attention_norm", # llama-pth
123
124
  "language_model.encoder.layers.{bid}.input_layernorm", # persimmon
124
125
  "model.layers.{bid}.ln1", # yi
@@ -136,6 +137,7 @@ class TensorNameMap:
136
137
  "model.layers.{bid}.ln1", # rwkv7
137
138
  "model.layers.{bid}.input_layernorm", # llama4
138
139
  "transformer_encoder.{bid}.attention_norm", # neobert
140
+ "model.layers.{bid}.operator_norm", # lfm2
139
141
  ),
140
142
 
141
143
  # Attention norm 2
@@ -220,6 +222,7 @@ class TensorNameMap:
220
222
  "transformer.h.{bid}.self_attention.dense", # falcon
221
223
  "h.{bid}.self_attention.dense", # bloom
222
224
  "model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2 phimoe
225
+ "model.layers.{bid}.self_attn.out_proj", # lfm2
223
226
  "model.layers.{bid}.self_attn.linear_attn", # deci
224
227
  "layers.{bid}.attention.wo", # llama-pth
225
228
  "encoder.layer.{bid}.attention.output.dense", # bert
@@ -279,6 +282,8 @@ class TensorNameMap:
279
282
  "transformer.decoder_layer.{bid}.rms_norm_2", # Grok
280
283
  "encoder.layers.{bid}.post_attention_layernorm", # chatglm
281
284
  "transformer.layers.{bid}.ffn_norm", # openelm
285
+ "model.layers.{bid}.pre_ff_layernorm", # jamba granite-hybrid
286
+ "model.layers.{bid}.pre_moe_layernorm", # mini-jamba
282
287
  "model.layers.{bid}.post_attention_layernorm", # llama4
283
288
  "transformer_encoder.{bid}.ffn_norm", # neobert
284
289
  ),
@@ -286,12 +291,14 @@ class TensorNameMap:
286
291
  # Post feed-forward norm
287
292
  MODEL_TENSOR.FFN_PRE_NORM: (
288
293
  "model.layers.{bid}.pre_feedforward_layernorm", # gemma2
294
+ "model.layers.{bid}.pre_ff_layernorm.weight",
289
295
  ),
290
296
 
291
297
  # Post feed-forward norm
292
298
  MODEL_TENSOR.FFN_POST_NORM: (
293
299
  "model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
294
300
  "model.layers.{bid}.post_mlp_layernorm", # glm-4-0414
301
+ "model.layers.{bid}.feed_forward.up_proj",
295
302
  ),
296
303
 
297
304
  MODEL_TENSOR.FFN_GATE_INP: (
@@ -301,8 +308,9 @@ class TensorNameMap:
301
308
  "transformer.decoder_layer.{bid}.router", # Grok
302
309
  "transformer.blocks.{bid}.ffn.router.layer", # dbrx
303
310
  "model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
304
- "model.layers.{bid}.feed_forward.router", # llama4
311
+ "model.layers.{bid}.feed_forward.router", # llama4 jamba
305
312
  "encoder.layers.{bid}.mlp.router.layer", # nomic-bert-moe
313
+ "model.layers.{bid}.mlp.gate.wg", # hunyuan
306
314
  ),
307
315
 
308
316
  MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
@@ -344,7 +352,7 @@ class TensorNameMap:
344
352
  "model.layers.{bid}.residual_mlp.w3", # arctic
345
353
  "encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
346
354
  "transformer.h.{bid}.mlp.c_fc_1", # exaone
347
- "model.layers.{bid}.feed_forward.up_proj", # llama4
355
+ "model.layers.{bid}.feed_forward.up_proj", # llama4 jamba granite-hybrid
348
356
  "transformer_encoder.{bid}.ffn.w12", # neobert
349
357
  ),
350
358
 
@@ -362,6 +370,8 @@ class TensorNameMap:
362
370
  "model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
363
371
  "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
364
372
  "model.layers.{bid}.feed_forward.shared_expert.up_proj", # llama4
373
+ "model.layers.{bid}.feed_forward.down_proj",
374
+ "model.layers.{bid}.mlp.shared_mlp.up_proj", # hunyuan
365
375
  ),
366
376
 
367
377
  # AWQ-activation gate
@@ -382,7 +392,7 @@ class TensorNameMap:
382
392
  "transformer.h.{bid}.mlp.linear_1", # refact
383
393
  "model.layers.{bid}.residual_mlp.w1", # arctic
384
394
  "transformer.h.{bid}.mlp.c_fc_0", # exaone
385
- "model.layers.{bid}.feed_forward.gate_proj", # llama4
395
+ "model.layers.{bid}.feed_forward.gate_proj", # llama4 jamba granite-hybrid
386
396
  ),
387
397
 
388
398
  MODEL_TENSOR.FFN_GATE_EXP: (
@@ -398,6 +408,7 @@ class TensorNameMap:
398
408
  "model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
399
409
  "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
400
410
  "model.layers.{bid}.feed_forward.shared_expert.gate_proj", # llama4
411
+ "model.layers.{bid}.mlp.shared_mlp.gate_proj", # hunyuan
401
412
  ),
402
413
 
403
414
  # Feed-forward down
@@ -427,7 +438,7 @@ class TensorNameMap:
427
438
  "encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
428
439
  "encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
429
440
  "model.layers.h.{bid}.mlp.c_proj", # exaone
430
- "model.layers.{bid}.feed_forward.down_proj", # llama4
441
+ "model.layers.{bid}.feed_forward.down_proj", # llama4 jamba granite-hybrid
431
442
  "transformer_encoder.{bid}.ffn.w3", # neobert
432
443
  ),
433
444
 
@@ -447,11 +458,13 @@ class TensorNameMap:
447
458
  "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
448
459
  "model.layers.{bid}.feed_forward.shared_expert.down_proj", # llama4
449
460
  "model.layers.{bid}.shared_mlp.output_linear", # granitemoe
461
+ "model.layers.{bid}.mlp.shared_mlp.down_proj", # hunyuan
450
462
  ),
451
463
 
452
464
  MODEL_TENSOR.ATTN_Q_NORM: (
453
465
  "language_model.encoder.layers.{bid}.self_attention.q_layernorm",
454
466
  "model.layers.{bid}.self_attn.q_layernorm", # persimmon
467
+ "model.layers.{bid}.self_attn.query_layernorm", # hunyuan
455
468
  "model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo2
456
469
  "transformer.blocks.{bid}.attn.q_ln", # sea-lion
457
470
  "encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
@@ -461,6 +474,7 @@ class TensorNameMap:
461
474
  MODEL_TENSOR.ATTN_K_NORM: (
462
475
  "language_model.encoder.layers.{bid}.self_attention.k_layernorm",
463
476
  "model.layers.{bid}.self_attn.k_layernorm", # persimmon
477
+ "model.layers.{bid}.self_attn.key_layernorm", # hunyuan
464
478
  "model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo2
465
479
  "transformer.blocks.{bid}.attn.k_ln", # sea-lion
466
480
  "encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
@@ -477,7 +491,7 @@ class TensorNameMap:
477
491
  "encoder.layers.{bid}.norm2", # nomic-bert
478
492
  "transformer.decoder_layer.{bid}.rms_norm_3", # Grok
479
493
  "encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
480
- "encoder.layer.{bid}.layer_norm_2" # jina-v2-code
494
+ "encoder.layer.{bid}.layer_norm_2", # jina-v2-code
481
495
  ),
482
496
 
483
497
  MODEL_TENSOR.PER_LAYER_TOKEN_EMBD: (
@@ -545,38 +559,64 @@ class TensorNameMap:
545
559
  ),
546
560
 
547
561
  MODEL_TENSOR.SSM_IN: (
548
- "model.layers.{bid}.in_proj",
549
- "backbone.layers.{bid}.mixer.in_proj",
562
+ "model.layers.{bid}.in_proj", # mamba-hf
563
+ "backbone.layers.{bid}.mixer.in_proj", # mamba
564
+ "model.layers.{bid}.mamba.in_proj", # jamba falcon-h1 granite-hybrid
550
565
  ),
551
566
 
552
567
  MODEL_TENSOR.SSM_CONV1D: (
553
- "model.layers.{bid}.conv1d",
554
- "backbone.layers.{bid}.mixer.conv1d",
568
+ "model.layers.{bid}.conv1d", # mamba-hf
569
+ "backbone.layers.{bid}.mixer.conv1d", # mamba
570
+ "model.layers.{bid}.mamba.conv1d", # jamba falcon-h1 granite-hybrid
555
571
  ),
556
572
 
557
573
  MODEL_TENSOR.SSM_X: (
558
- "model.layers.{bid}.x_proj",
559
- "backbone.layers.{bid}.mixer.x_proj",
574
+ "model.layers.{bid}.x_proj", # mamba-hf
575
+ "backbone.layers.{bid}.mixer.x_proj", # mamba
576
+ "model.layers.{bid}.mamba.x_proj", # jamba
560
577
  ),
561
578
 
562
579
  MODEL_TENSOR.SSM_DT: (
563
- "model.layers.{bid}.dt_proj",
564
- "backbone.layers.{bid}.mixer.dt_proj",
580
+ "model.layers.{bid}.dt_proj", # mamba-hf
581
+ "backbone.layers.{bid}.mixer.dt_proj", # mamba
582
+ "model.layers.{bid}.mamba.dt_proj", # jamba falcon-h1 granite-hybrid
583
+ ),
584
+
585
+ MODEL_TENSOR.SSM_DT_NORM: (
586
+ "model.layers.{bid}.mamba.dt_layernorm", # jamba
565
587
  ),
566
588
 
567
589
  MODEL_TENSOR.SSM_A: (
568
- "model.layers.{bid}.A_log",
569
- "backbone.layers.{bid}.mixer.A_log",
590
+ "model.layers.{bid}.A_log", # mamba-hf
591
+ "backbone.layers.{bid}.mixer.A_log", # mamba
592
+ "model.layers.{bid}.mamba.A_log", # jamba falcon-h1 granite-hybrid
593
+ ),
594
+
595
+ MODEL_TENSOR.SSM_B_NORM: (
596
+ "model.layers.{bid}.mamba.b_layernorm", # jamba
597
+ "model.layers.{bid}.mamba.B_layernorm", # mini-jamba
598
+ ),
599
+
600
+ MODEL_TENSOR.SSM_C_NORM: (
601
+ "model.layers.{bid}.mamba.c_layernorm", # jamba
602
+ "model.layers.{bid}.mamba.C_layernorm", # mini-jamba
570
603
  ),
571
604
 
572
605
  MODEL_TENSOR.SSM_D: (
573
- "model.layers.{bid}.D",
574
- "backbone.layers.{bid}.mixer.D",
606
+ "model.layers.{bid}.D", # mamba-hf
607
+ "backbone.layers.{bid}.mixer.D", # mamba
608
+ "model.layers.{bid}.mamba.D", # jamba falcon-h1 granite-hybrid
609
+ ),
610
+
611
+ MODEL_TENSOR.SSM_NORM: (
612
+ "model.layers.{bid}.mamba.norm", # falcon-h1 granite-hybrid
613
+ "backbone.layers.{bid}.mixer.norm", # mamba2
575
614
  ),
576
615
 
577
616
  MODEL_TENSOR.SSM_OUT: (
578
- "model.layers.{bid}.out_proj",
579
- "backbone.layers.{bid}.mixer.out_proj",
617
+ "model.layers.{bid}.out_proj", # mamba-hf
618
+ "backbone.layers.{bid}.mixer.out_proj", # mamba
619
+ "model.layers.{bid}.mamba.out_proj", # jamba falcon-h1 granite-hybrid
580
620
  ),
581
621
 
582
622
  MODEL_TENSOR.TIME_MIX_W0: (
@@ -978,6 +1018,18 @@ class TensorNameMap:
978
1018
  "backbone.posnet.{bid}.proj_out", # wavtokenizer
979
1019
  ),
980
1020
 
1021
+ MODEL_TENSOR.SHORTCONV_CONV: (
1022
+ "model.layers.{bid}.conv.conv",
1023
+ ),
1024
+
1025
+ MODEL_TENSOR.SHORTCONV_INPROJ: (
1026
+ "model.layers.{bid}.conv.in_proj",
1027
+ ),
1028
+
1029
+ MODEL_TENSOR.SHORTCONV_OUTPROJ: (
1030
+ "model.layers.{bid}.conv.out_proj",
1031
+ ),
1032
+
981
1033
  #############################################################################
982
1034
  ## Vision encoder
983
1035
 
@@ -245,9 +245,18 @@ class SpecialVocab:
245
245
  if not tokenizer_config:
246
246
  return True
247
247
  chat_template_alt = None
248
- chat_template_file = path / 'chat_template.json'
249
- if chat_template_file.is_file():
250
- with open(chat_template_file, encoding = 'utf-8') as f:
248
+ chat_template_json = path / 'chat_template.json'
249
+ chat_template_jinja = path / 'chat_template.jinja'
250
+ if chat_template_jinja.is_file():
251
+ with open(chat_template_jinja, encoding = 'utf-8') as f:
252
+ chat_template_alt = f.read()
253
+ if additional_templates := list((path / 'additional_chat_templates').glob('*.jinja')):
254
+ chat_template_alt = [{'name': 'default', 'template': chat_template_alt}]
255
+ for template_path in additional_templates:
256
+ with open(template_path, encoding = 'utf-8') as fp:
257
+ chat_template_alt.append({'name': template_path.stem, 'template': fp.read()})
258
+ elif chat_template_json.is_file():
259
+ with open(chat_template_json, encoding = 'utf-8') as f:
251
260
  chat_template_alt = json.load(f).get('chat_template')
252
261
  chat_template = tokenizer_config.get('chat_template', chat_template_alt)
253
262
  if chat_template is None or isinstance(chat_template, (str, list)):