@novastera-oss/llamarn 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. package/android/src/main/cpp/include/llama.h +8 -3
  2. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  3. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  6. package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
  7. package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
  8. package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
  9. package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
  10. package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
  11. package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
  12. package/android/src/main/jniLibs/x86/libggml.so +0 -0
  13. package/android/src/main/jniLibs/x86/libllama.so +0 -0
  14. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  15. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  16. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  17. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  18. package/cpp/LlamaCppModel.cpp +56 -22
  19. package/cpp/build-info.cpp +2 -2
  20. package/cpp/llama.cpp/CMakeLists.txt +1 -2
  21. package/cpp/llama.cpp/README.md +4 -5
  22. package/cpp/llama.cpp/build-xcframework.sh +1 -1
  23. package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
  24. package/cpp/llama.cpp/common/arg.cpp +24 -0
  25. package/cpp/llama.cpp/common/chat.cpp +37 -20
  26. package/cpp/llama.cpp/common/chat.h +2 -0
  27. package/cpp/llama.cpp/common/common.cpp +3 -0
  28. package/cpp/llama.cpp/common/common.h +5 -0
  29. package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
  30. package/cpp/llama.cpp/convert_hf_to_gguf.py +860 -23
  31. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +9 -0
  32. package/cpp/llama.cpp/ggml/CMakeLists.txt +8 -2
  33. package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
  34. package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
  35. package/cpp/llama.cpp/ggml/include/ggml.h +206 -10
  36. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +17 -1
  37. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -8
  38. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +36 -18
  39. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +68 -5
  40. package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +1 -1
  41. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +16 -2
  42. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +37 -3
  43. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +10 -9
  44. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +109 -108
  45. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +1027 -1038
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +53 -52
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +56 -55
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +42 -41
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +24 -23
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +29 -28
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +30 -29
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +83 -82
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +20 -19
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +3 -2
  56. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +9 -3
  57. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +111 -103
  58. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
  59. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3 -2
  60. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1405 -240
  61. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +8 -0
  62. package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +25 -24
  63. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +56 -40
  64. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +212 -34
  65. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +35 -11
  66. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +187 -54
  67. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +71 -29
  68. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
  69. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
  70. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
  71. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
  72. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +22 -0
  73. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
  74. package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
  75. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +4 -1
  76. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +8 -4
  77. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +6 -4
  78. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +14 -12
  79. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +5 -3
  80. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +15 -10
  81. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +12 -6
  82. package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +269 -110
  84. package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
  85. package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
  86. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +2 -8
  87. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
  88. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
  89. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
  90. package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
  91. package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
  92. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
  93. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
  94. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
  95. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
  96. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +97 -0
  97. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +11 -0
  98. package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
  99. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +14 -5
  100. package/cpp/llama.cpp/ggml/src/ggml-impl.h +125 -183
  101. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
  102. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +51 -9
  103. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +394 -80
  104. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +616 -239
  105. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
  106. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +3 -0
  107. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +741 -571
  108. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
  109. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
  110. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  111. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
  112. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
  113. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
  114. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
  115. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
  116. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
  117. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
  118. package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
  119. package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
  120. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
  121. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -24
  122. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
  123. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
  124. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +99 -166
  125. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +94 -72
  126. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
  127. package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
  128. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +697 -1098
  129. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
  130. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +6 -9
  131. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +104 -62
  132. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
  133. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
  134. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
  135. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +132 -201
  136. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
  137. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +39 -38
  138. package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
  139. package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
  140. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
  141. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
  142. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
  143. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
  144. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
  145. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +12 -1
  146. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +767 -292
  147. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -0
  148. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +58 -7
  149. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
  150. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
  151. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
  152. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
  153. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
  154. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
  155. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
  156. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
  157. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
  158. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
  159. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
  160. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
  161. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
  162. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
  163. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +12 -3
  164. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
  165. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
  166. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
  167. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
  168. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
  169. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
  170. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
  171. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
  172. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +23 -3
  173. package/cpp/llama.cpp/ggml/src/ggml.c +449 -72
  174. package/cpp/llama.cpp/ggml/src/gguf.cpp +13 -2
  175. package/cpp/llama.cpp/gguf-py/gguf/constants.py +285 -0
  176. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +27 -0
  177. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +137 -21
  178. package/cpp/llama.cpp/gguf-py/gguf/vocab.py +109 -7
  179. package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
  180. package/cpp/llama.cpp/include/llama.h +8 -43
  181. package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
  182. package/cpp/llama.cpp/src/llama-arch.cpp +265 -3
  183. package/cpp/llama.cpp/src/llama-arch.h +36 -1
  184. package/cpp/llama.cpp/src/llama-batch.cpp +596 -359
  185. package/cpp/llama.cpp/src/llama-batch.h +105 -70
  186. package/cpp/llama.cpp/src/llama-chat.cpp +26 -6
  187. package/cpp/llama.cpp/src/llama-chat.h +1 -0
  188. package/cpp/llama.cpp/src/llama-context.cpp +101 -107
  189. package/cpp/llama.cpp/src/llama-context.h +13 -13
  190. package/cpp/llama.cpp/src/llama-graph.cpp +286 -404
  191. package/cpp/llama.cpp/src/llama-graph.h +78 -79
  192. package/cpp/llama.cpp/src/llama-hparams.cpp +11 -1
  193. package/cpp/llama.cpp/src/llama-hparams.h +11 -0
  194. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +74 -66
  195. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +23 -26
  196. package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +312 -157
  197. package/cpp/llama.cpp/src/llama-kv-cache-unified.h +79 -46
  198. package/cpp/llama.cpp/src/llama-kv-cells.h +97 -21
  199. package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +73 -69
  200. package/cpp/llama.cpp/src/llama-memory-hybrid.h +19 -22
  201. package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +88 -77
  202. package/cpp/llama.cpp/src/llama-memory-recurrent.h +15 -20
  203. package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
  204. package/cpp/llama.cpp/src/llama-memory.h +21 -22
  205. package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
  206. package/cpp/llama.cpp/src/llama-model.cpp +5301 -2922
  207. package/cpp/llama.cpp/src/llama-model.h +40 -0
  208. package/cpp/llama.cpp/src/llama-quant.cpp +88 -5
  209. package/cpp/llama.cpp/src/llama-vocab.cpp +37 -3
  210. package/cpp/llama.cpp/src/llama-vocab.h +42 -0
  211. package/cpp/rn-utils.h +3 -0
  212. package/ios/include/chat.h +2 -0
  213. package/ios/include/common.h +5 -0
  214. package/ios/include/llama.h +8 -43
  215. package/ios/libs/llama.xcframework/Info.plist +19 -19
  216. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  217. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4863
  218. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  219. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  220. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +206 -10
  221. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +8 -43
  222. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  223. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  224. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
  225. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3742
  226. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  227. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  228. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
  229. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
  230. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  231. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  232. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
  233. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3891 -3744
  234. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
  235. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
  236. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +206 -10
  237. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +8 -43
  238. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
  239. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
  240. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +206 -10
  241. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +8 -43
  242. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  243. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
  244. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
  245. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +206 -10
  246. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +8 -43
  247. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  248. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  249. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  250. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4863
  251. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  252. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  253. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +206 -10
  254. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +8 -43
  255. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  256. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  257. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
  258. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3742
  259. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  260. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  261. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
  262. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
  263. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  264. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  265. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5095 -4900
  266. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  267. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  268. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +206 -10
  269. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +8 -43
  270. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  271. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  272. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5066 -4871
  273. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3919 -3773
  274. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  275. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  276. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
  277. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
  278. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  279. package/package.json +1 -1
  280. package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
  281. package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  282. package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  283. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
  284. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
  285. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
  286. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
  287. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
  288. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
  289. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
  290. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
  291. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
  292. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
  293. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
  294. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
  295. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
  296. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
  297. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
  298. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
  299. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
  300. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
  301. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
  302. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
  303. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
  304. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
  305. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
  306. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
  307. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
  308. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
  309. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
  310. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
  311. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
  312. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
  313. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
  314. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
  315. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
  316. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
  317. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
  318. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
  319. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
@@ -13,7 +13,7 @@ class TensorNameMap:
13
13
  "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
14
14
  "transformer.word_embeddings", # falcon
15
15
  "word_embeddings", # bloom
16
- "model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414
16
+ "model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414 granite-hybrid
17
17
  "tok_embeddings", # llama-pth
18
18
  "embeddings.word_embeddings", # bert nomic-bert
19
19
  "language_model.embedding.word_embeddings", # persimmon
@@ -50,6 +50,7 @@ class TensorNameMap:
50
50
  "model.pre_ln", # rwkv7
51
51
  "model.layers.0.pre_norm", # rwkv7
52
52
  "backbone.norm", # wavtokenizer
53
+ "model.embedding_norm", # lfm2
53
54
  ),
54
55
 
55
56
  # Position embeddings
@@ -118,7 +119,7 @@ class TensorNameMap:
118
119
  "transformer.h.{bid}.input_layernorm", # falcon7b
119
120
  "h.{bid}.input_layernorm", # bloom
120
121
  "transformer.h.{bid}.ln_mlp", # falcon40b
121
- "model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe
122
+ "model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe granite-hybrid
122
123
  "layers.{bid}.attention_norm", # llama-pth
123
124
  "language_model.encoder.layers.{bid}.input_layernorm", # persimmon
124
125
  "model.layers.{bid}.ln1", # yi
@@ -136,6 +137,7 @@ class TensorNameMap:
136
137
  "model.layers.{bid}.ln1", # rwkv7
137
138
  "model.layers.{bid}.input_layernorm", # llama4
138
139
  "transformer_encoder.{bid}.attention_norm", # neobert
140
+ "model.layers.{bid}.operator_norm", # lfm2
139
141
  ),
140
142
 
141
143
  # Attention norm 2
@@ -220,6 +222,7 @@ class TensorNameMap:
220
222
  "transformer.h.{bid}.self_attention.dense", # falcon
221
223
  "h.{bid}.self_attention.dense", # bloom
222
224
  "model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2 phimoe
225
+ "model.layers.{bid}.self_attn.out_proj", # lfm2
223
226
  "model.layers.{bid}.self_attn.linear_attn", # deci
224
227
  "layers.{bid}.attention.wo", # llama-pth
225
228
  "encoder.layer.{bid}.attention.output.dense", # bert
@@ -279,6 +282,8 @@ class TensorNameMap:
279
282
  "transformer.decoder_layer.{bid}.rms_norm_2", # Grok
280
283
  "encoder.layers.{bid}.post_attention_layernorm", # chatglm
281
284
  "transformer.layers.{bid}.ffn_norm", # openelm
285
+ "model.layers.{bid}.pre_ff_layernorm", # jamba granite-hybrid
286
+ "model.layers.{bid}.pre_moe_layernorm", # mini-jamba
282
287
  "model.layers.{bid}.post_attention_layernorm", # llama4
283
288
  "transformer_encoder.{bid}.ffn_norm", # neobert
284
289
  ),
@@ -286,12 +291,14 @@ class TensorNameMap:
286
291
  # Post feed-forward norm
287
292
  MODEL_TENSOR.FFN_PRE_NORM: (
288
293
  "model.layers.{bid}.pre_feedforward_layernorm", # gemma2
294
+ "model.layers.{bid}.pre_ff_layernorm.weight",
289
295
  ),
290
296
 
291
297
  # Post feed-forward norm
292
298
  MODEL_TENSOR.FFN_POST_NORM: (
293
299
  "model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
294
300
  "model.layers.{bid}.post_mlp_layernorm", # glm-4-0414
301
+ "model.layers.{bid}.feed_forward.up_proj",
295
302
  ),
296
303
 
297
304
  MODEL_TENSOR.FFN_GATE_INP: (
@@ -301,8 +308,9 @@ class TensorNameMap:
301
308
  "transformer.decoder_layer.{bid}.router", # Grok
302
309
  "transformer.blocks.{bid}.ffn.router.layer", # dbrx
303
310
  "model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
304
- "model.layers.{bid}.feed_forward.router", # llama4
311
+ "model.layers.{bid}.feed_forward.router", # llama4 jamba
305
312
  "encoder.layers.{bid}.mlp.router.layer", # nomic-bert-moe
313
+ "model.layers.{bid}.mlp.gate.wg", # hunyuan
306
314
  ),
307
315
 
308
316
  MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
@@ -344,7 +352,7 @@ class TensorNameMap:
344
352
  "model.layers.{bid}.residual_mlp.w3", # arctic
345
353
  "encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
346
354
  "transformer.h.{bid}.mlp.c_fc_1", # exaone
347
- "model.layers.{bid}.feed_forward.up_proj", # llama4
355
+ "model.layers.{bid}.feed_forward.up_proj", # llama4 jamba granite-hybrid
348
356
  "transformer_encoder.{bid}.ffn.w12", # neobert
349
357
  ),
350
358
 
@@ -362,6 +370,8 @@ class TensorNameMap:
362
370
  "model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
363
371
  "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
364
372
  "model.layers.{bid}.feed_forward.shared_expert.up_proj", # llama4
373
+ "model.layers.{bid}.feed_forward.down_proj",
374
+ "model.layers.{bid}.mlp.shared_mlp.up_proj", # hunyuan
365
375
  ),
366
376
 
367
377
  # AWQ-activation gate
@@ -382,7 +392,7 @@ class TensorNameMap:
382
392
  "transformer.h.{bid}.mlp.linear_1", # refact
383
393
  "model.layers.{bid}.residual_mlp.w1", # arctic
384
394
  "transformer.h.{bid}.mlp.c_fc_0", # exaone
385
- "model.layers.{bid}.feed_forward.gate_proj", # llama4
395
+ "model.layers.{bid}.feed_forward.gate_proj", # llama4 jamba granite-hybrid
386
396
  ),
387
397
 
388
398
  MODEL_TENSOR.FFN_GATE_EXP: (
@@ -398,6 +408,7 @@ class TensorNameMap:
398
408
  "model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
399
409
  "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
400
410
  "model.layers.{bid}.feed_forward.shared_expert.gate_proj", # llama4
411
+ "model.layers.{bid}.mlp.shared_mlp.gate_proj", # hunyuan
401
412
  ),
402
413
 
403
414
  # Feed-forward down
@@ -427,7 +438,7 @@ class TensorNameMap:
427
438
  "encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
428
439
  "encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
429
440
  "model.layers.h.{bid}.mlp.c_proj", # exaone
430
- "model.layers.{bid}.feed_forward.down_proj", # llama4
441
+ "model.layers.{bid}.feed_forward.down_proj", # llama4 jamba granite-hybrid
431
442
  "transformer_encoder.{bid}.ffn.w3", # neobert
432
443
  ),
433
444
 
@@ -447,11 +458,13 @@ class TensorNameMap:
447
458
  "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
448
459
  "model.layers.{bid}.feed_forward.shared_expert.down_proj", # llama4
449
460
  "model.layers.{bid}.shared_mlp.output_linear", # granitemoe
461
+ "model.layers.{bid}.mlp.shared_mlp.down_proj", # hunyuan
450
462
  ),
451
463
 
452
464
  MODEL_TENSOR.ATTN_Q_NORM: (
453
465
  "language_model.encoder.layers.{bid}.self_attention.q_layernorm",
454
466
  "model.layers.{bid}.self_attn.q_layernorm", # persimmon
467
+ "model.layers.{bid}.self_attn.query_layernorm", # hunyuan
455
468
  "model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo2
456
469
  "transformer.blocks.{bid}.attn.q_ln", # sea-lion
457
470
  "encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
@@ -461,6 +474,7 @@ class TensorNameMap:
461
474
  MODEL_TENSOR.ATTN_K_NORM: (
462
475
  "language_model.encoder.layers.{bid}.self_attention.k_layernorm",
463
476
  "model.layers.{bid}.self_attn.k_layernorm", # persimmon
477
+ "model.layers.{bid}.self_attn.key_layernorm", # hunyuan
464
478
  "model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo2
465
479
  "transformer.blocks.{bid}.attn.k_ln", # sea-lion
466
480
  "encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
@@ -477,42 +491,132 @@ class TensorNameMap:
477
491
  "encoder.layers.{bid}.norm2", # nomic-bert
478
492
  "transformer.decoder_layer.{bid}.rms_norm_3", # Grok
479
493
  "encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
480
- "encoder.layer.{bid}.layer_norm_2" # jina-v2-code
494
+ "encoder.layer.{bid}.layer_norm_2", # jina-v2-code
495
+ ),
496
+
497
+ MODEL_TENSOR.PER_LAYER_TOKEN_EMBD: (
498
+ "model.embed_tokens_per_layer", # gemma3n
499
+ ),
500
+
501
+ MODEL_TENSOR.PER_LAYER_MODEL_PROJ: (
502
+ "model.per_layer_model_projection", # gemma3n
503
+ ),
504
+
505
+ MODEL_TENSOR.PER_LAYER_PROJ_NORM: (
506
+ "model.per_layer_projection_norm", # gemma3n
507
+ ),
508
+
509
+ MODEL_TENSOR.ALTUP_PROJ: (
510
+ "model.altup_projections", # gemma3n
511
+ ),
512
+
513
+ MODEL_TENSOR.ALTUP_UNEMBD_PROJ: (
514
+ "model.altup_unembed_projections", # gemma3n
515
+ ),
516
+
517
+ MODEL_TENSOR.PER_LAYER_INP_GATE: (
518
+ "model.layers.{bid}.per_layer_input_gate", # gemma3n
519
+ ),
520
+
521
+ MODEL_TENSOR.PER_LAYER_PROJ: (
522
+ "model.layers.{bid}.per_layer_projection", # gemma3n
523
+ ),
524
+
525
+ MODEL_TENSOR.PER_LAYER_POST_NORM: (
526
+ "model.layers.{bid}.post_per_layer_input_norm", # gemma3n
527
+ ),
528
+
529
+ MODEL_TENSOR.ALTUP_CORRECT_COEF: (
530
+ "model.layers.{bid}.altup.correction_coefs", # gemma3n
531
+ ),
532
+
533
+ MODEL_TENSOR.ALTUP_CORRECT_SCALE: (
534
+ "model.layers.{bid}.altup.correct_output_scale", # gemma3n
535
+ ),
536
+
537
+ MODEL_TENSOR.ALTUP_PREDICT_COEF: (
538
+ "model.layers.{bid}.altup.prediction_coefs", # gemma3n
539
+ ),
540
+
541
+ MODEL_TENSOR.ALTUP_ROUTER: (
542
+ "model.layers.{bid}.altup.modality_router", # gemma3n
543
+ ),
544
+
545
+ MODEL_TENSOR.ALTUP_ROUTER_NORM: (
546
+ "model.layers.{bid}.altup.router_norm", # gemma3n
547
+ ),
548
+
549
+ MODEL_TENSOR.LAUREL_L: (
550
+ "model.layers.{bid}.laurel.linear_left", # gemma3n
551
+ ),
552
+
553
+ MODEL_TENSOR.LAUREL_R: (
554
+ "model.layers.{bid}.laurel.linear_right", # gemma3n
555
+ ),
556
+
557
+ MODEL_TENSOR.LAUREL_POST_NORM: (
558
+ "model.layers.{bid}.laurel.post_laurel_norm", # gemma3n
481
559
  ),
482
560
 
483
561
  MODEL_TENSOR.SSM_IN: (
484
- "model.layers.{bid}.in_proj",
485
- "backbone.layers.{bid}.mixer.in_proj",
562
+ "model.layers.{bid}.in_proj", # mamba-hf
563
+ "backbone.layers.{bid}.mixer.in_proj", # mamba
564
+ "model.layers.{bid}.mamba.in_proj", # jamba falcon-h1 granite-hybrid
486
565
  ),
487
566
 
488
567
  MODEL_TENSOR.SSM_CONV1D: (
489
- "model.layers.{bid}.conv1d",
490
- "backbone.layers.{bid}.mixer.conv1d",
568
+ "model.layers.{bid}.conv1d", # mamba-hf
569
+ "backbone.layers.{bid}.mixer.conv1d", # mamba
570
+ "model.layers.{bid}.mamba.conv1d", # jamba falcon-h1 granite-hybrid
491
571
  ),
492
572
 
493
573
  MODEL_TENSOR.SSM_X: (
494
- "model.layers.{bid}.x_proj",
495
- "backbone.layers.{bid}.mixer.x_proj",
574
+ "model.layers.{bid}.x_proj", # mamba-hf
575
+ "backbone.layers.{bid}.mixer.x_proj", # mamba
576
+ "model.layers.{bid}.mamba.x_proj", # jamba
496
577
  ),
497
578
 
498
579
  MODEL_TENSOR.SSM_DT: (
499
- "model.layers.{bid}.dt_proj",
500
- "backbone.layers.{bid}.mixer.dt_proj",
580
+ "model.layers.{bid}.dt_proj", # mamba-hf
581
+ "backbone.layers.{bid}.mixer.dt_proj", # mamba
582
+ "model.layers.{bid}.mamba.dt_proj", # jamba falcon-h1 granite-hybrid
583
+ ),
584
+
585
+ MODEL_TENSOR.SSM_DT_NORM: (
586
+ "model.layers.{bid}.mamba.dt_layernorm", # jamba
501
587
  ),
502
588
 
503
589
  MODEL_TENSOR.SSM_A: (
504
- "model.layers.{bid}.A_log",
505
- "backbone.layers.{bid}.mixer.A_log",
590
+ "model.layers.{bid}.A_log", # mamba-hf
591
+ "backbone.layers.{bid}.mixer.A_log", # mamba
592
+ "model.layers.{bid}.mamba.A_log", # jamba falcon-h1 granite-hybrid
593
+ ),
594
+
595
+ MODEL_TENSOR.SSM_B_NORM: (
596
+ "model.layers.{bid}.mamba.b_layernorm", # jamba
597
+ "model.layers.{bid}.mamba.B_layernorm", # mini-jamba
598
+ ),
599
+
600
+ MODEL_TENSOR.SSM_C_NORM: (
601
+ "model.layers.{bid}.mamba.c_layernorm", # jamba
602
+ "model.layers.{bid}.mamba.C_layernorm", # mini-jamba
506
603
  ),
507
604
 
508
605
  MODEL_TENSOR.SSM_D: (
509
- "model.layers.{bid}.D",
510
- "backbone.layers.{bid}.mixer.D",
606
+ "model.layers.{bid}.D", # mamba-hf
607
+ "backbone.layers.{bid}.mixer.D", # mamba
608
+ "model.layers.{bid}.mamba.D", # jamba falcon-h1 granite-hybrid
609
+ ),
610
+
611
+ MODEL_TENSOR.SSM_NORM: (
612
+ "model.layers.{bid}.mamba.norm", # falcon-h1 granite-hybrid
613
+ "backbone.layers.{bid}.mixer.norm", # mamba2
511
614
  ),
512
615
 
513
616
  MODEL_TENSOR.SSM_OUT: (
514
- "model.layers.{bid}.out_proj",
515
- "backbone.layers.{bid}.mixer.out_proj",
617
+ "model.layers.{bid}.out_proj", # mamba-hf
618
+ "backbone.layers.{bid}.mixer.out_proj", # mamba
619
+ "model.layers.{bid}.mamba.out_proj", # jamba falcon-h1 granite-hybrid
516
620
  ),
517
621
 
518
622
  MODEL_TENSOR.TIME_MIX_W0: (
@@ -914,6 +1018,18 @@ class TensorNameMap:
914
1018
  "backbone.posnet.{bid}.proj_out", # wavtokenizer
915
1019
  ),
916
1020
 
1021
+ MODEL_TENSOR.SHORTCONV_CONV: (
1022
+ "model.layers.{bid}.conv.conv",
1023
+ ),
1024
+
1025
+ MODEL_TENSOR.SHORTCONV_INPROJ: (
1026
+ "model.layers.{bid}.conv.in_proj",
1027
+ ),
1028
+
1029
+ MODEL_TENSOR.SHORTCONV_OUTPROJ: (
1030
+ "model.layers.{bid}.conv.out_proj",
1031
+ ),
1032
+
917
1033
  #############################################################################
918
1034
  ## Vision encoder
919
1035
 
@@ -7,7 +7,10 @@ import os
7
7
  from pathlib import Path
8
8
  from typing import Any, Callable, Sequence, Mapping, Iterable, Protocol, ClassVar, runtime_checkable
9
9
 
10
- from sentencepiece import SentencePieceProcessor
10
+ try:
11
+ from sentencepiece import SentencePieceProcessor
12
+ except ImportError:
13
+ SentencePieceProcessor = None
11
14
 
12
15
  import gguf
13
16
 
@@ -116,6 +119,7 @@ class SpecialVocab:
116
119
  logger.warning(f'Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping')
117
120
 
118
121
  def _try_load_from_tokenizer_json(self, path: Path) -> bool:
122
+ tokenizer = None
119
123
  tokenizer_file = path / 'tokenizer.json'
120
124
  if tokenizer_file.is_file():
121
125
  with open(tokenizer_file, encoding = 'utf-8') as f:
@@ -149,15 +153,110 @@ class SpecialVocab:
149
153
  added_tokens = tokenizer.get('added_tokens', {})
150
154
  else:
151
155
  added_tokens = {}
156
+ tokenizer_config = None
152
157
  tokenizer_config_file = path / 'tokenizer_config.json'
153
- if not tokenizer_config_file.is_file():
158
+ if tokenizer_config_file.is_file():
159
+ with open(tokenizer_config_file, encoding = 'utf-8') as f:
160
+ tokenizer_config = json.load(f)
161
+ if tokenizer:
162
+ special_bos = (tokenizer_config or {}).get('bos_token')
163
+ special_cls = (tokenizer_config or {}).get('cls_token')
164
+ special_eos = (tokenizer_config or {}).get('eos_token')
165
+ special_sep = (tokenizer_config or {}).get('sep_token')
166
+ if not special_bos and special_cls and tokenizer_config:
167
+ tokenizer_config['bos_token'] = special_bos = special_cls
168
+ if not special_eos and special_sep and tokenizer_config:
169
+ tokenizer_config['eos_token'] = special_eos = special_sep
170
+ if post_processor := tokenizer.get('post_processor'):
171
+ for processor in post_processor.get('processors', [post_processor]):
172
+ if processor.get('type') == 'RobertaProcessing':
173
+ self.add_special_token['bos'] = True
174
+ self.add_special_token['eos'] = True
175
+ self.add_special_token['sep'] = True
176
+ if not special_cls and tokenizer_config:
177
+ special_cls = processor.get('cls', [special_bos])[0]
178
+ tokenizer_config['cls_token'] = special_cls
179
+ if not special_sep and tokenizer_config:
180
+ special_sep = processor.get('sep', [special_eos])[0]
181
+ tokenizer_config['sep_token'] = special_sep
182
+ continue
183
+ # Crude parsing of TemplateProcessing to determine if BOS/SEP/EOS should be added
184
+ # Only works with simple templates, **will** get it wrong on unusual sequences
185
+ if processor.get('type') == 'TemplateProcessing':
186
+ tmpl_single = processor.get('single', [])
187
+ tmpl_pair = processor.get('pair', [])
188
+ special_first = None
189
+ special_last = None
190
+ if len(tmpl_single) > 1:
191
+ if special_first := tmpl_single[0].get('SpecialToken', {}).get('id'):
192
+ if not tokenizer_config:
193
+ special_bos = special_first
194
+ self.add_special_token['bos'] = True if special_first in (special_bos, special_cls) else False
195
+ if special_first not in (special_bos, special_cls):
196
+ logger.warning(f'Unknown leading special token {special_first!r} in TemplateProcessing<single>')
197
+ if special_last := tmpl_single[-1].get('SpecialToken', {}).get('id'):
198
+ if not tokenizer_config:
199
+ special_eos = special_last
200
+ elif special_last != special_eos:
201
+ if 'eot' not in self.special_token_types:
202
+ self.special_token_types = tuple(self.special_token_types) + ('eot', )
203
+ tokenizer_config['eot_token'] = special_eos
204
+ elif 'eom' not in self.special_token_types:
205
+ self.special_token_types = tuple(self.special_token_types) + ('eom', )
206
+ tokenizer_config['eom_token'] = special_eos
207
+ else:
208
+ logger.warning(f'Overriding EOS token {special_eos!r} with {special_last!r} without EOT/EOM fallback!')
209
+ tokenizer_config['eos_token'] = special_eos = special_last
210
+ self.add_special_token['eos'] = True if special_last == special_eos else False
211
+ if special_last != special_eos:
212
+ logger.warning(f'Unknown trailing special token {special_last!r} in TemplateProcessing<single>')
213
+ if tmpl_pair:
214
+ seq_start = 1 if special_first and tmpl_pair[0].get('SpecialToken', {}).get('id') == special_first else 0
215
+ seq_stop = -1 if special_last and tmpl_pair[-1].get('SpecialToken', {}).get('id') == special_last else None
216
+ if (special_first and seq_start == 0) or (special_last and seq_stop is None):
217
+ logger.warning('TemplateProcessing<single> leading/trailing special tokens do not match TemplateProcessing<pair>')
218
+ if tmpl_pair := tmpl_pair[slice(seq_start, seq_stop)]:
219
+ tmpl_a = tmpl_pair[0].get('Sequence', {}).get('id')
220
+ tmpl_b = tmpl_pair[-1].get('Sequence', {}).get('id')
221
+ if tmpl_a != 'A' or tmpl_b != 'B':
222
+ logger.warning(f'Unknown sequence {tmpl_a}...{tmpl_b} in TemplateProcessing<pair>')
223
+ # A [sep] [eos] B
224
+ if tmpl_a == 'A' and tmpl_b == 'B' and (tmpl_pair := tmpl_pair[1:-1]):
225
+ add_sep = False
226
+ if special_entry := tmpl_pair[0].get('SpecialToken', {}).get('id'):
227
+ if special_entry in (special_sep, special_eos) and not special_last:
228
+ add_sep = True
229
+ if special_entry not in (special_sep, special_eos):
230
+ logger.warning(f'Unknown separator token {special_entry!r} in TemplateProcessing<pair>')
231
+ else:
232
+ logger.warning(f'Unknown middle sequence {tmpl_pair[0]!r} in TemplateProcessing<pair>')
233
+ if len(tmpl_pair) == 2:
234
+ if special_entry := tmpl_pair[1].get('SpecialToken', {}).get('id'):
235
+ if special_entry in (special_sep, special_eos):
236
+ add_sep = True
237
+ if special_entry not in (special_sep, special_eos):
238
+ logger.warning(f'Unknown second separator token {special_entry!r} in TemplateProcessing<pair>')
239
+ else:
240
+ logger.warning(f'Unknown second middle sequence {tmpl_pair[1]!r} in TemplateProcessing<pair>')
241
+ self.add_special_token['sep'] = add_sep
242
+ if add_sep and not special_sep and tokenizer_config:
243
+ tokenizer_config['sep_token'] = special_eos
244
+ continue
245
+ if not tokenizer_config:
154
246
  return True
155
- with open(tokenizer_config_file, encoding = 'utf-8') as f:
156
- tokenizer_config = json.load(f)
157
247
  chat_template_alt = None
158
- chat_template_file = path / 'chat_template.json'
159
- if chat_template_file.is_file():
160
- with open(chat_template_file, encoding = 'utf-8') as f:
248
+ chat_template_json = path / 'chat_template.json'
249
+ chat_template_jinja = path / 'chat_template.jinja'
250
+ if chat_template_jinja.is_file():
251
+ with open(chat_template_jinja, encoding = 'utf-8') as f:
252
+ chat_template_alt = f.read()
253
+ if additional_templates := list((path / 'additional_chat_templates').glob('*.jinja')):
254
+ chat_template_alt = [{'name': 'default', 'template': chat_template_alt}]
255
+ for template_path in additional_templates:
256
+ with open(template_path, encoding = 'utf-8') as fp:
257
+ chat_template_alt.append({'name': template_path.stem, 'template': fp.read()})
258
+ elif chat_template_json.is_file():
259
+ with open(chat_template_json, encoding = 'utf-8') as f:
161
260
  chat_template_alt = json.load(f).get('chat_template')
162
261
  chat_template = tokenizer_config.get('chat_template', chat_template_alt)
163
262
  if chat_template is None or isinstance(chat_template, (str, list)):
@@ -302,6 +401,9 @@ class SentencePieceVocab(Vocab):
302
401
  name = "spm"
303
402
 
304
403
  def __init__(self, base_path: Path):
404
+ if SentencePieceProcessor is None:
405
+ raise RuntimeError("sentencepiece is not installed")
406
+
305
407
  added_tokens: dict[str, int] = {}
306
408
  if (fname_tokenizer := base_path / 'tokenizer.model').exists():
307
409
  # normal location
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "gguf"
3
- version = "0.17.0"
3
+ version = "0.17.1"
4
4
  description = "Read and write ML models in GGUF for GGML"
5
5
  authors = ["GGML <ggml@ggml.ai>"]
6
6
  packages = [
@@ -22,7 +22,7 @@ python = ">=3.8"
22
22
  numpy = ">=1.17"
23
23
  tqdm = ">=4.27"
24
24
  pyyaml = ">=5.1"
25
- sentencepiece = ">=0.1.98,<=0.2.0"
25
+ sentencepiece = { version = ">=0.1.98,<=0.2.0", optional = true }
26
26
  PySide6 = { version = "^6.9", python = ">=3.9,<3.14", optional = true }
27
27
 
28
28
  [tool.poetry.dev-dependencies]
@@ -79,46 +79,6 @@ extern "C" {
79
79
  LLAMA_VOCAB_TYPE_RWKV = 5, // RWKV tokenizer based on greedy tokenization
80
80
  };
81
81
 
82
- // pre-tokenization types
83
- enum llama_vocab_pre_type {
84
- LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0,
85
- LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1,
86
- LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM = 2,
87
- LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3,
88
- LLAMA_VOCAB_PRE_TYPE_FALCON = 4,
89
- LLAMA_VOCAB_PRE_TYPE_MPT = 5,
90
- LLAMA_VOCAB_PRE_TYPE_STARCODER = 6,
91
- LLAMA_VOCAB_PRE_TYPE_GPT2 = 7,
92
- LLAMA_VOCAB_PRE_TYPE_REFACT = 8,
93
- LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9,
94
- LLAMA_VOCAB_PRE_TYPE_STABLELM2 = 10,
95
- LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11,
96
- LLAMA_VOCAB_PRE_TYPE_OLMO = 12,
97
- LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
98
- LLAMA_VOCAB_PRE_TYPE_SMAUG = 14,
99
- LLAMA_VOCAB_PRE_TYPE_PORO = 15,
100
- LLAMA_VOCAB_PRE_TYPE_CHATGLM3 = 16,
101
- LLAMA_VOCAB_PRE_TYPE_CHATGLM4 = 17,
102
- LLAMA_VOCAB_PRE_TYPE_VIKING = 18,
103
- LLAMA_VOCAB_PRE_TYPE_JAIS = 19,
104
- LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20,
105
- LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21,
106
- LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22,
107
- LLAMA_VOCAB_PRE_TYPE_BLOOM = 23,
108
- LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
109
- LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
110
- LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
111
- LLAMA_VOCAB_PRE_TYPE_MINERVA = 27,
112
- LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28,
113
- LLAMA_VOCAB_PRE_TYPE_GPT4O = 29,
114
- LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30,
115
- LLAMA_VOCAB_PRE_TYPE_TRILLION = 31,
116
- LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32,
117
- LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33,
118
- LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34,
119
- LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35,
120
- };
121
-
122
82
  enum llama_rope_type {
123
83
  LLAMA_ROPE_TYPE_NONE = -1,
124
84
  LLAMA_ROPE_TYPE_NORM = 0,
@@ -390,6 +350,7 @@ extern "C" {
390
350
  void * imatrix; // pointer to importance matrix data
391
351
  void * kv_overrides; // pointer to vector containing overrides
392
352
  void * tensor_types; // pointer to vector containing tensor types
353
+ void * prune_layers; // pointer to vector containing layer indices to prune
393
354
  } llama_model_quantize_params;
394
355
 
395
356
  typedef struct llama_logit_bias {
@@ -943,12 +904,14 @@ extern "C" {
943
904
  // Requires the context to have a memory.
944
905
  // For encode-decoder contexts, processes the batch using the decoder.
945
906
  // Positive return values does not mean a fatal error, but rather a warning.
946
- // Upon non-zero return values, the memory state is restored to the state before this call
907
+ // Upon fatal-error or abort, the ubatches that managed to be been processed will remain in the memory state of the context
908
+ // To handle this correctly, query the memory state using llama_memory_seq_pos_min() and llama_memory_seq_pos_max()
909
+ // Upon other return values, the memory state is restored to the state before this call
947
910
  // 0 - success
948
911
  // 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
949
- // 2 - aborted
912
+ // 2 - aborted (processed ubatches will remain in the context's memory)
950
913
  // -1 - invalid input batch
951
- // < -1 - error
914
+ // < -1 - fatal error (processed ubatches will remain in the context's memory)
952
915
  LLAMA_API int32_t llama_decode(
953
916
  struct llama_context * ctx,
954
917
  struct llama_batch batch);
@@ -1044,6 +1007,7 @@ extern "C" {
1044
1007
 
1045
1008
  LLAMA_API bool llama_vocab_get_add_bos(const struct llama_vocab * vocab);
1046
1009
  LLAMA_API bool llama_vocab_get_add_eos(const struct llama_vocab * vocab);
1010
+ LLAMA_API bool llama_vocab_get_add_sep(const struct llama_vocab * vocab);
1047
1011
 
1048
1012
  LLAMA_API llama_token llama_vocab_fim_pre(const struct llama_vocab * vocab);
1049
1013
  LLAMA_API llama_token llama_vocab_fim_suf(const struct llama_vocab * vocab);
@@ -1087,6 +1051,7 @@ extern "C" {
1087
1051
  /// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
1088
1052
  /// @return Returns the number of tokens on success, no more than n_tokens_max
1089
1053
  /// @return Returns a negative number on failure - the number of tokens that would have been returned
1054
+ /// @return Returns INT32_MIN on overflow (e.g., tokenization result size exceeds int32_t limit)
1090
1055
  /// @param add_special Allow to add BOS and EOS tokens if model is configured to do so.
1091
1056
  /// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated
1092
1057
  /// as plaintext. Does not insert a leading space.