@novastera-oss/llamarn 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. package/README.md +80 -14
  2. package/RNLlamaCpp.podspec +10 -3
  3. package/android/CMakeLists.txt +8 -0
  4. package/android/src/main/cpp/include/llama.h +62 -125
  5. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  11. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  13. package/cpp/PureCppImpl.cpp +9 -27
  14. package/cpp/SystemUtils.h +2 -2
  15. package/cpp/build-info.cpp +2 -2
  16. package/cpp/llama.cpp/README.md +11 -3
  17. package/cpp/llama.cpp/build-xcframework.sh +1 -0
  18. package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
  19. package/cpp/llama.cpp/common/arg.cpp +153 -113
  20. package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
  21. package/cpp/llama.cpp/common/chat-parser.h +117 -0
  22. package/cpp/llama.cpp/common/chat.cpp +847 -699
  23. package/cpp/llama.cpp/common/chat.h +73 -6
  24. package/cpp/llama.cpp/common/common.cpp +50 -82
  25. package/cpp/llama.cpp/common/common.h +21 -17
  26. package/cpp/llama.cpp/common/json-partial.cpp +255 -0
  27. package/cpp/llama.cpp/common/json-partial.h +37 -0
  28. package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
  29. package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
  30. package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
  31. package/cpp/llama.cpp/common/regex-partial.h +56 -0
  32. package/cpp/llama.cpp/common/sampling.cpp +7 -8
  33. package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
  34. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
  35. package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
  36. package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
  37. package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
  38. package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
  39. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
  40. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
  41. package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
  42. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
  43. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
  44. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
  45. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
  56. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
  57. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
  58. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
  59. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
  60. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
  61. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
  62. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
  63. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
  64. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
  65. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
  66. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
  67. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
  68. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
  69. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
  70. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
  71. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
  72. package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
  73. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
  74. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
  75. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
  76. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
  77. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  78. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  79. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
  80. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
  81. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  82. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
  84. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  85. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
  86. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
  87. package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
  88. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
  89. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
  90. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
  91. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
  92. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
  93. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
  94. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
  95. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  96. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
  97. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
  98. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
  99. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  100. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
  101. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
  102. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
  103. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
  104. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
  105. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
  106. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
  107. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
  108. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
  109. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
  110. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
  111. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
  112. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
  113. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
  114. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
  115. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
  116. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
  117. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
  118. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
  119. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
  120. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
  121. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
  122. package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
  123. package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
  124. package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
  125. package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
  126. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
  127. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
  128. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
  129. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
  130. package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
  131. package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
  132. package/cpp/llama.cpp/include/llama.h +62 -125
  133. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
  134. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
  135. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
  136. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
  137. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
  138. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
  139. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
  140. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
  141. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
  142. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
  143. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
  144. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
  145. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
  146. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
  147. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
  148. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
  149. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
  150. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
  151. package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
  152. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
  153. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
  154. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
  155. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
  156. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
  157. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
  158. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
  159. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
  160. package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
  161. package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
  162. package/cpp/llama.cpp/models/templates/README.md +2 -0
  163. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
  164. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
  165. package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  166. package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
  167. package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
  168. package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
  169. package/cpp/llama.cpp/src/llama-arch.h +2 -0
  170. package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
  171. package/cpp/llama.cpp/src/llama-context.cpp +340 -123
  172. package/cpp/llama.cpp/src/llama-context.h +30 -0
  173. package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
  174. package/cpp/llama.cpp/src/llama-cparams.h +2 -0
  175. package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
  176. package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
  177. package/cpp/llama.cpp/src/llama-graph.h +52 -7
  178. package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
  179. package/cpp/llama.cpp/src/llama-hparams.h +37 -5
  180. package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
  181. package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
  182. package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
  183. package/cpp/llama.cpp/src/llama-memory.h +4 -3
  184. package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
  185. package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
  186. package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
  187. package/cpp/llama.cpp/src/llama-model.cpp +529 -172
  188. package/cpp/llama.cpp/src/llama-model.h +6 -1
  189. package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
  190. package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
  191. package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
  192. package/cpp/llama.cpp/src/llama-vocab.h +6 -0
  193. package/cpp/llama.cpp/src/llama.cpp +14 -0
  194. package/cpp/rn-completion.cpp +60 -5
  195. package/ios/include/chat.h +73 -6
  196. package/ios/include/common/minja/chat-template.hpp +9 -5
  197. package/ios/include/common/minja/minja.hpp +69 -36
  198. package/ios/include/common.h +21 -17
  199. package/ios/include/llama.h +62 -125
  200. package/ios/libs/llama.xcframework/Info.plist +19 -19
  201. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  202. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
  203. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  204. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
  205. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
  206. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  207. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  208. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  209. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
  210. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  211. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  212. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  213. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  214. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  215. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  216. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
  217. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
  218. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
  219. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
  220. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
  221. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
  222. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
  223. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  224. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
  225. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
  226. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
  227. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  228. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  229. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  230. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
  231. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  232. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
  233. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
  234. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  235. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  236. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
  237. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
  238. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  239. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  240. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  241. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  242. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  243. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
  244. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  245. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
  246. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
  247. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  248. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  249. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
  250. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
  251. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  252. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  253. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  254. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  255. package/package.json +1 -1
  256. package/cpp/llama.cpp/common/stb_image.h +0 -7988
  257. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  258. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  259. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  260. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  261. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  262. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  263. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  264. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  265. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  266. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  267. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  268. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
@@ -45,7 +45,7 @@ class SentencePieceTokenTypes(IntEnum):
45
45
 
46
46
  class ModelType(IntEnum):
47
47
  TEXT = 1
48
- VISION = 2
48
+ MMPROJ = 2
49
49
 
50
50
 
51
51
  AnyModel = TypeVar("AnyModel", bound="type[ModelBase]")
@@ -54,7 +54,7 @@ AnyModel = TypeVar("AnyModel", bound="type[ModelBase]")
54
54
  class ModelBase:
55
55
  _model_classes: dict[ModelType, dict[str, type[ModelBase]]] = {
56
56
  ModelType.TEXT: {},
57
- ModelType.VISION: {},
57
+ ModelType.MMPROJ: {},
58
58
  }
59
59
 
60
60
  dir_model: Path
@@ -88,7 +88,7 @@ class ModelBase:
88
88
  small_first_shard: bool = False, hparams: dict[str, Any] | None = None, remote_hf_model_id: str | None = None):
89
89
  if type(self) is ModelBase or \
90
90
  type(self) is TextModel or \
91
- type(self) is VisionModel:
91
+ type(self) is MmprojModel:
92
92
  raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
93
93
 
94
94
  self.dir_model = dir_model
@@ -308,6 +308,8 @@ class ModelBase:
308
308
  gguf.MODEL_TENSOR.TIME_MIX_LERP_FUSED,
309
309
  gguf.MODEL_TENSOR.POSNET_NORM1,
310
310
  gguf.MODEL_TENSOR.POSNET_NORM2,
311
+ gguf.MODEL_TENSOR.V_ENC_EMBD_POS,
312
+ gguf.MODEL_TENSOR.A_ENC_EMBD_POS,
311
313
  )
312
314
  )
313
315
  or not new_name.endswith(".weight")
@@ -421,23 +423,26 @@ class ModelBase:
421
423
  try:
422
424
  # for security reason, we don't allow loading remote code by default
423
425
  # if a model need remote code, we will fallback to config.json
424
- return AutoConfig.from_pretrained(dir_model, trust_remote_code=False).to_dict()
426
+ config = AutoConfig.from_pretrained(dir_model, trust_remote_code=False).to_dict()
425
427
  except Exception as e:
426
428
  logger.warning(f"Failed to load model config from {dir_model}: {e}")
427
429
  logger.warning("Trying to load config.json instead")
428
430
  with open(dir_model / "config.json", "r", encoding="utf-8") as f:
429
431
  config = json.load(f)
430
- if "llm_config" in config:
431
- # rename for InternVL
432
- config["text_config"] = config["llm_config"]
433
- return config
432
+ if "llm_config" in config:
433
+ # rename for InternVL
434
+ config["text_config"] = config["llm_config"]
435
+ if "thinker_config" in config:
436
+ # rename for Qwen2.5-Omni
437
+ config["text_config"] = config["thinker_config"]["text_config"]
438
+ return config
434
439
 
435
440
  @classmethod
436
441
  def register(cls, *names: str) -> Callable[[AnyModel], AnyModel]:
437
442
  assert names
438
443
 
439
444
  def func(modelcls: AnyModel) -> AnyModel:
440
- model_type = ModelType.VISION if modelcls.model_arch == gguf.MODEL_ARCH.CLIP_VISION else ModelType.TEXT
445
+ model_type = ModelType.MMPROJ if modelcls.model_arch == gguf.MODEL_ARCH.MMPROJ else ModelType.TEXT
441
446
  for name in names:
442
447
  cls._model_classes[model_type][name] = modelcls
443
448
  return modelcls
@@ -518,15 +523,15 @@ class TextModel(ModelBase):
518
523
  self.gguf_writer.add_context_length(n_ctx)
519
524
  logger.info(f"gguf: context length = {n_ctx}")
520
525
 
521
- if (n_embd := self.find_hparam(["hidden_size", "n_embd"], optional=True)) is not None:
526
+ if (n_embd := self.find_hparam(["hidden_size", "n_embd", "dim"], optional=True)) is not None:
522
527
  self.gguf_writer.add_embedding_length(n_embd)
523
528
  logger.info(f"gguf: embedding length = {n_embd}")
524
529
 
525
- if (n_ff := self.find_hparam(["intermediate_size", "n_inner"], optional=True)) is not None:
530
+ if (n_ff := self.find_hparam(["intermediate_size", "n_inner", "hidden_dim"], optional=True)) is not None:
526
531
  self.gguf_writer.add_feed_forward_length(n_ff)
527
532
  logger.info(f"gguf: feed forward length = {n_ff}")
528
533
 
529
- if (n_head := self.find_hparam(["num_attention_heads", "n_head"], optional=True)) is not None:
534
+ if (n_head := self.find_hparam(["num_attention_heads", "n_head", "n_heads"], optional=True)) is not None:
530
535
  self.gguf_writer.add_head_count(n_head)
531
536
  logger.info(f"gguf: head count = {n_head}")
532
537
 
@@ -669,12 +674,12 @@ class TextModel(ModelBase):
669
674
  if chkhsh == "8aeee3860c56296a157a1fe2fad249ec40aa59b1bb5709f4ade11c4e6fe652ed":
670
675
  # ref: https://huggingface.co/tiiuae/falcon-7b
671
676
  res = "falcon"
672
- if chkhsh == "9d032fcbd5501f4a38150912590928bfb36091efb5df11b8e2124b0390e3fb1e":
673
- # ref: https://huggingface.co/tiiuae/Falcon3-7B-Base
674
- res = "falcon3"
675
677
  if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
676
678
  # ref: https://huggingface.co/BAAI/bge-small-en-v1.5
677
679
  res = "bert-bge"
680
+ if chkhsh == "9d032fcbd5501f4a38150912590928bfb36091efb5df11b8e2124b0390e3fb1e":
681
+ # ref: https://huggingface.co/tiiuae/Falcon3-7B-Base
682
+ res = "falcon3"
678
683
  if chkhsh == "8e62295832751ca1e8f92f2226f403dea30dc5165e448b5bfa05af5340c64ec7":
679
684
  # ref: https://huggingface.co/BAAI/bge-large-zh-v1.5
680
685
  res = "bert-bge-large"
@@ -726,9 +731,6 @@ class TextModel(ModelBase):
726
731
  if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a":
727
732
  # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code
728
733
  res = "jina-v2-code"
729
- if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b" or chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516":
730
- # ref: https://huggingface.co/THUDM/glm-4-9b-chat
731
- res = "chatglm-bpe"
732
734
  if chkhsh == "7fc505bd3104ca1083b150b17d088b59534ede9bde81f0dd2090967d7fe52cee":
733
735
  # ref: https://huggingface.co/LumiOpen/Viking-7B
734
736
  res = "viking"
@@ -759,9 +761,6 @@ class TextModel(ModelBase):
759
761
  if chkhsh == "60824e3c0d9401f89943cbb2fff727f0e2d4c545ba4df2d6e4f09a6db0f5b450":
760
762
  # ref: https://huggingface.co/facebook/chameleon-7b
761
763
  res = "chameleon"
762
- if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35":
763
- # ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0
764
- res = "minerva-7b"
765
764
  if chkhsh == "8b5a93ed704057481f240da0be7e7dca721d7f8f4755263b6807227a2cbeae65":
766
765
  # ref: https://huggingface.co/sentence-transformers/stsb-roberta-base
767
766
  res = "roberta-bpe"
@@ -792,15 +791,24 @@ class TextModel(ModelBase):
792
791
  if chkhsh == "d353350c764d8c3b39c763113960e4fb4919bea5fbf208a0e3b22e8469dc7406":
793
792
  # ref: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct
794
793
  res = "llama4"
795
- if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2":
796
- # ref: https://huggingface.co/THUDM/glm-4-9b-hf
797
- res = "glm4"
798
794
  if chkhsh == "0e9433cbbb161f89e264eb32e8e64bfe69e834973ffca5d41d3948a604a3e2a3":
799
795
  # ref: https://huggingface.co/mistral-community/pixtral-12b
800
796
  res = "pixtral"
801
797
  if chkhsh == "d5f1dd6f980fec569fb218a81a7658ac45fc56b38c5a0adeb1c232fbe04ef5ec":
802
798
  # ref: https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base
803
799
  res = "seed-coder"
800
+ if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b":
801
+ # ref: https://huggingface.co/THUDM/glm-4-9b-chat
802
+ res = "chatglm-bpe"
803
+ if chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516":
804
+ # ref: https://huggingface.co/THUDM/glm-4-9b-chat
805
+ res = "chatglm-bpe"
806
+ if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2":
807
+ # ref: https://huggingface.co/THUDM/glm-4-9b-hf
808
+ res = "glm4"
809
+ if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35":
810
+ # ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0
811
+ res = "minerva-7b"
804
812
 
805
813
  if res is None:
806
814
  logger.warning("\n")
@@ -1113,60 +1121,116 @@ class TextModel(ModelBase):
1113
1121
  self.gguf_writer.add_pooling_type(pooling_type)
1114
1122
 
1115
1123
 
1116
- class VisionModel(ModelBase):
1117
- model_type = ModelType.VISION
1118
- model_arch = gguf.MODEL_ARCH.CLIP_VISION
1124
+ class MmprojModel(ModelBase):
1125
+ model_type = ModelType.MMPROJ
1126
+ model_arch = gguf.MODEL_ARCH.MMPROJ
1119
1127
  preprocessor_config: dict[str, Any]
1120
1128
  global_config: dict[str, Any]
1121
1129
 
1130
+ n_block_keys = ["n_layers", "num_hidden_layers", "n_layer", "num_layers", "depth"]
1131
+
1132
+ has_vision_encoder: bool = True # by default
1133
+ has_audio_encoder: bool = False
1134
+
1135
+ # for models having multiple encoders, we need to separate their hparams
1136
+ hparams_vision: dict[str, Any] | None = None
1137
+ hparams_audio: dict[str, Any] | None = None
1138
+
1122
1139
  def __init__(self, *args, **kwargs):
1123
1140
  super().__init__(*args, **kwargs)
1124
1141
 
1125
- if self.model_arch != gguf.MODEL_ARCH.CLIP_VISION:
1126
- raise TypeError("VisionModel must be subclassed with model_arch = gguf.MODEL_ARCH.CLIP_VISION")
1142
+ if self.model_arch != gguf.MODEL_ARCH.MMPROJ:
1143
+ raise TypeError("MmprojModel must be subclassed with model_arch = gguf.MODEL_ARCH.MMPROJ")
1127
1144
 
1128
1145
  # get n_embd of the text model
1129
1146
  if "text_config" not in self.hparams:
1130
1147
  self.hparams["text_config"] = {}
1148
+ if "audio_config" not in self.hparams:
1149
+ self.hparams["audio_config"] = {}
1131
1150
  text_config = {**self.hparams, **self.hparams["text_config"]}
1132
1151
  self.n_embd_text = text_config.get("hidden_size", text_config.get("n_embd", 0))
1133
1152
  assert self.n_embd_text > 0, "n_embd not found in hparams"
1134
1153
 
1135
- if "vision_config" not in self.hparams:
1136
- raise ValueError("vision_config not found in hparams")
1137
1154
  # move vision config to the top level, while preserving the original hparams in global_config
1138
- self.global_config = self.hparams
1139
- self.hparams = self.hparams["vision_config"]
1155
+ import copy
1156
+ self.global_config = copy.deepcopy(self.hparams)
1157
+ self.hparams_vision = self.get_vision_config()
1158
+ self.hparams_audio = self.get_audio_config()
1159
+
1160
+ if self.hparams_vision is None and self.hparams_audio is None:
1161
+ raise ValueError("vision_config / audio_config not found in hparams")
1162
+
1163
+ # for compat with vision-only models
1164
+ self.hparams = self.hparams_vision or self.hparams_audio or self.hparams
1140
1165
 
1141
- self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers", "depth"])
1142
- self.tensor_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.CLIP_VISION, self.block_count)
1166
+ # TODO @ngxson : this is a hack to support both vision and audio encoders
1167
+ have_multiple_encoders = self.has_audio_encoder and self.has_vision_encoder
1168
+ self.block_count = 128 if have_multiple_encoders else self.find_hparam(self.n_block_keys, True)
1169
+ self.tensor_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.MMPROJ, self.block_count)
1143
1170
 
1144
1171
  # load preprocessor config
1145
1172
  with open(self.dir_model / "preprocessor_config.json", "r", encoding="utf-8") as f:
1146
1173
  self.preprocessor_config = json.load(f)
1147
1174
 
1175
+ def get_vision_config(self) -> dict[str, Any] | None:
1176
+ return self.global_config.get("vision_config")
1177
+
1178
+ def get_audio_config(self) -> dict[str, Any] | None:
1179
+ return self.global_config.get("audio_config")
1180
+
1148
1181
  def set_type(self):
1149
- self.gguf_writer.add_type(gguf.GGUFType.CLIP_VISION)
1182
+ self.gguf_writer.add_type(gguf.GGUFType.MMPROJ)
1150
1183
 
1151
1184
  def set_gguf_parameters(self):
1152
1185
  self.gguf_writer.add_file_type(self.ftype)
1153
- self.gguf_writer.add_vision_projection_dim(self.n_embd_text)
1154
- self.gguf_writer.add_vision_has_vision_encoder(True)
1155
1186
 
1156
- # vision config
1157
- self.gguf_writer.add_vision_image_size(self.find_hparam(["image_size"]))
1158
- self.gguf_writer.add_vision_patch_size(self.find_hparam(["patch_size"]))
1159
- self.gguf_writer.add_vision_embedding_length(self.find_hparam(["hidden_size"]))
1160
- self.gguf_writer.add_vision_feed_forward_length(self.find_hparam(["intermediate_size"]))
1161
- self.gguf_writer.add_vision_block_count(self.block_count)
1162
- self.gguf_writer.add_vision_head_count(self.find_hparam(["num_attention_heads"]))
1187
+ if self.has_vision_encoder:
1188
+ self.gguf_writer.add_clip_has_vision_encoder(True)
1189
+ self.gguf_writer.add_vision_projection_dim(self.n_embd_text)
1190
+
1191
+ # vision config
1192
+ self.gguf_writer.add_vision_image_size(self.find_vparam(["image_size"]))
1193
+ self.gguf_writer.add_vision_patch_size(self.find_vparam(["patch_size"]))
1194
+ self.gguf_writer.add_vision_embedding_length(self.find_vparam(["hidden_size"]))
1195
+ self.gguf_writer.add_vision_feed_forward_length(self.find_vparam(["intermediate_size"]))
1196
+ self.gguf_writer.add_vision_block_count(self.find_vparam(self.n_block_keys))
1197
+ self.gguf_writer.add_vision_head_count(self.find_vparam(["num_attention_heads"]))
1198
+
1199
+ # preprocessor config
1200
+ self.gguf_writer.add_vision_image_mean(self.preprocessor_config["image_mean"])
1201
+ self.gguf_writer.add_vision_image_std(self.preprocessor_config["image_std"])
1202
+
1203
+ if self.has_audio_encoder:
1204
+ self.gguf_writer.add_clip_has_audio_encoder(True)
1205
+ self.gguf_writer.add_audio_projection_dim(self.n_embd_text)
1206
+
1207
+ # audio config
1208
+ self.gguf_writer.add_audio_embedding_length(self.find_aparam(["hidden_size"]))
1209
+ self.gguf_writer.add_audio_feed_forward_length(self.find_aparam(["intermediate_size"]))
1210
+ self.gguf_writer.add_audio_block_count(self.find_aparam(self.n_block_keys))
1211
+ self.gguf_writer.add_audio_head_count(self.find_aparam(["num_attention_heads"]))
1163
1212
 
1164
- # preprocessor config
1165
- self.gguf_writer.add_vision_image_mean(self.preprocessor_config["image_mean"])
1166
- self.gguf_writer.add_vision_image_std(self.preprocessor_config["image_std"])
1213
+ if not self.has_vision_encoder and not self.has_audio_encoder:
1214
+ raise ValueError("MmprojModel must have either vision or audio encoder")
1167
1215
 
1168
1216
  def write_vocab(self):
1169
- raise ValueError("VisionModel does not support vocab writing")
1217
+ raise ValueError("MmprojModel does not support vocab writing")
1218
+
1219
+ def find_vparam(self, keys: Iterable[str], optional: bool = False) -> Any:
1220
+ assert self.hparams_vision is not None
1221
+ return self._find_param(self.hparams_vision, keys, optional)
1222
+
1223
+ def find_aparam(self, keys: Iterable[str], optional: bool = False) -> Any:
1224
+ assert self.hparams_audio is not None
1225
+ return self._find_param(self.hparams_audio, keys, optional)
1226
+
1227
+ def _find_param(self, obj: dict[str, Any], keys: Iterable[str], optional: bool = False) -> Any:
1228
+ key = next((k for k in keys if k in obj), None)
1229
+ if key is not None:
1230
+ return obj[key]
1231
+ if optional:
1232
+ return None
1233
+ raise KeyError(f"could not find any of: {keys}")
1170
1234
 
1171
1235
 
1172
1236
  @ModelBase.register("GPTNeoXForCausalLM")
@@ -1780,7 +1844,8 @@ class StableLMModel(TextModel):
1780
1844
  "MistralForCausalLM",
1781
1845
  "MixtralForCausalLM",
1782
1846
  "VLlama3ForCausalLM",
1783
- "LlavaForConditionalGeneration")
1847
+ "LlavaForConditionalGeneration",
1848
+ "LlamaModel")
1784
1849
  class LlamaModel(TextModel):
1785
1850
  model_arch = gguf.MODEL_ARCH.LLAMA
1786
1851
  undo_permute = True
@@ -1860,6 +1925,8 @@ class LlamaModel(TextModel):
1860
1925
 
1861
1926
  if is_vision_tensor:
1862
1927
  return [] # skip vision tensors
1928
+ elif self.hf_arch == "LlamaModel":
1929
+ name = "model." + name
1863
1930
  elif name.startswith("model.text_model"):
1864
1931
  name = name.replace("text_model.", "") # for SmolVLM
1865
1932
  elif name.startswith("language_model."):
@@ -1950,7 +2017,7 @@ class LlamaModel(TextModel):
1950
2017
  "LlavaForConditionalGeneration", # pixtral
1951
2018
  "Mistral3ForConditionalGeneration", # mistral small 3.1
1952
2019
  )
1953
- class LlavaVisionModel(VisionModel):
2020
+ class LlavaVisionModel(MmprojModel):
1954
2021
  img_break_tok_id = -1
1955
2022
 
1956
2023
  def __init__(self, *args, **kwargs):
@@ -1976,7 +2043,7 @@ class LlavaVisionModel(VisionModel):
1976
2043
  super().set_gguf_parameters()
1977
2044
  hparams = self.hparams
1978
2045
  if hparams["model_type"] == "pixtral":
1979
- self.gguf_writer.add_vision_projector_type(gguf.VisionProjectorType.PIXTRAL)
2046
+ self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.PIXTRAL)
1980
2047
  self.gguf_writer.add_vision_attention_layernorm_eps(hparams["layer_norm_eps"])
1981
2048
 
1982
2049
  # hidden_act
@@ -2015,7 +2082,7 @@ class LlavaVisionModel(VisionModel):
2015
2082
 
2016
2083
 
2017
2084
  @ModelBase.register("Idefics3ForConditionalGeneration", "SmolVLMForConditionalGeneration")
2018
- class SmolVLMModel(VisionModel):
2085
+ class SmolVLMModel(MmprojModel):
2019
2086
  def __init__(self, *args, **kwargs):
2020
2087
  super().__init__(*args, **kwargs)
2021
2088
  if self.hparams["model_type"] == "smolvlm_vision":
@@ -2027,7 +2094,7 @@ class SmolVLMModel(VisionModel):
2027
2094
 
2028
2095
  def set_gguf_parameters(self):
2029
2096
  super().set_gguf_parameters()
2030
- self.gguf_writer.add_vision_projector_type(gguf.VisionProjectorType.IDEFICS3)
2097
+ self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.IDEFICS3)
2031
2098
  self.gguf_writer.add_vision_attention_layernorm_eps(self.hparams.get("layer_norm_eps", 1e-5))
2032
2099
  self.gguf_writer.add_vision_projector_scale_factor(self.global_config.get("scale_factor", 2))
2033
2100
  self.gguf_writer.add_vision_use_gelu(True)
@@ -2069,6 +2136,9 @@ class Llama4Model(LlamaModel):
2069
2136
  self.gguf_writer.add_expert_feed_forward_length(self.hparams["intermediate_size_moe"])
2070
2137
 
2071
2138
  def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
2139
+ if name.startswith("language_model."):
2140
+ name = name.replace("language_model.", "")
2141
+
2072
2142
  # split the gate_up into gate and up
2073
2143
  if "gate_up_proj" in name:
2074
2144
  name_up = name.replace("gate_up_proj", "up_proj.weight")
@@ -2089,6 +2159,29 @@ class Llama4Model(LlamaModel):
2089
2159
  return super().modify_tensors(data_torch, name, bid)
2090
2160
 
2091
2161
 
2162
+ @ModelBase.register("Llama4ForConditionalGeneration")
2163
+ class Llama4VisionModel(MmprojModel):
2164
+ def set_gguf_parameters(self):
2165
+ super().set_gguf_parameters()
2166
+ self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.LLAMA4)
2167
+ self.gguf_writer.add_vision_attention_layernorm_eps(self.hparams["norm_eps"])
2168
+ self.gguf_writer.add_vision_projector_scale_factor(int(1.0 / self.hparams["pixel_shuffle_ratio"]))
2169
+ assert self.hparams["hidden_act"] == "gelu"
2170
+ self.gguf_writer.add_vision_use_gelu(True)
2171
+
2172
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
2173
+ del bid # unused
2174
+ if "multi_modal_projector" in name or "vision_model" in name:
2175
+ # process vision tensors
2176
+ if "positional_embedding_vlm" in name and ".weight" not in name:
2177
+ name += ".weight"
2178
+ if "multi_modal_projector.linear_1" in name:
2179
+ # despite the name with number postfix, this is a single fully connected layer
2180
+ return [(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_MMPROJ_FC], data_torch)]
2181
+ return [(self.map_tensor_name(name), data_torch)]
2182
+ return []
2183
+
2184
+
2092
2185
  @ModelBase.register("Mistral3ForConditionalGeneration")
2093
2186
  class Mistral3Model(LlamaModel):
2094
2187
  model_arch = gguf.MODEL_ARCH.LLAMA
@@ -2591,7 +2684,7 @@ class QwenModel(TextModel):
2591
2684
  self.gguf_writer.add_file_type(self.ftype)
2592
2685
 
2593
2686
 
2594
- @ModelBase.register("Qwen2Model", "Qwen2ForCausalLM")
2687
+ @ModelBase.register("Qwen2Model", "Qwen2ForCausalLM", "Qwen2AudioForConditionalGeneration")
2595
2688
  class Qwen2Model(TextModel):
2596
2689
  model_arch = gguf.MODEL_ARCH.QWEN2
2597
2690
 
@@ -2615,13 +2708,19 @@ class Qwen2Model(TextModel):
2615
2708
  name = f"model.{name}" # map to Qwen2ForCausalLM tensors
2616
2709
  if "language_model." in name:
2617
2710
  name = name.replace("language_model.", "") # for InternVL
2618
- if name.startswith("mlp") or name.startswith("vision_model"):
2619
- # skip visual tensors
2711
+ if name.startswith("mlp") or name.startswith("multi_modal_projector") \
2712
+ or name.startswith("vision_model") or name.startswith("audio_tower"):
2713
+ # skip vision and audio tensors
2620
2714
  return []
2621
2715
  yield from super().modify_tensors(data_torch, name, bid)
2622
2716
 
2623
2717
 
2624
- @ModelBase.register("Qwen2VLForConditionalGeneration", "Qwen2_5_VLForConditionalGeneration")
2718
+ @ModelBase.register(
2719
+ "Qwen2VLModel",
2720
+ "Qwen2VLForConditionalGeneration",
2721
+ "Qwen2_5_VLForConditionalGeneration",
2722
+ "Qwen2_5OmniModel",
2723
+ )
2625
2724
  class Qwen2VLModel(TextModel):
2626
2725
  model_arch = gguf.MODEL_ARCH.QWEN2VL
2627
2726
 
@@ -2639,31 +2738,40 @@ class Qwen2VLModel(TextModel):
2639
2738
 
2640
2739
  def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
2641
2740
  del bid # unused
2642
- if name.startswith("visual."):
2643
- # skip visual tensors
2741
+ if name.startswith("thinker."):
2742
+ name = name.replace("thinker.", "")
2743
+ if name.startswith("visual") or name.startswith("audio") or \
2744
+ name.startswith("talker") or name.startswith("token2wav"):
2745
+ # skip multimodal tensors
2644
2746
  return []
2645
2747
  return [(self.map_tensor_name(name), data_torch)]
2646
2748
 
2647
2749
 
2648
- @ModelBase.register("Qwen2VLForConditionalGeneration", "Qwen2_5_VLForConditionalGeneration")
2649
- class Qwen2VLVisionModel(VisionModel):
2750
+ @ModelBase.register("Qwen2VLModel", "Qwen2VLForConditionalGeneration", "Qwen2_5_VLForConditionalGeneration")
2751
+ class Qwen2VLVisionModel(MmprojModel):
2650
2752
  def __init__(self, *args, **kwargs):
2651
2753
  super().__init__(*args, **kwargs)
2652
- self.hparams["image_size"] = self.hparams.get("image_size", 560)
2754
+ assert self.hparams_vision is not None
2755
+ self.hparams_vision["image_size"] = self.hparams_vision.get("image_size", 560)
2653
2756
  # rename config.json values
2654
- self.hparams["num_attention_heads"] = self.hparams.get("num_heads")
2655
- self.hparams["num_hidden_layers"] = self.hparams.get("depth")
2656
- if "embed_dim" in self.hparams: # qwen2vl
2657
- self.hparams["intermediate_size"] = self.hparams.get("hidden_size")
2658
- self.hparams["hidden_size"] = self.hparams.get("embed_dim")
2757
+ self.hparams_vision["num_attention_heads"] = self.hparams_vision.get("num_heads")
2758
+ self.hparams_vision["num_hidden_layers"] = self.hparams_vision.get("depth")
2759
+ if "embed_dim" in self.hparams_vision: # qwen2vl
2760
+ self.hparams_vision["intermediate_size"] = self.hparams_vision.get("hidden_size")
2761
+ self.hparams_vision["hidden_size"] = self.hparams_vision.get("embed_dim")
2659
2762
 
2660
2763
  def set_gguf_parameters(self):
2661
2764
  super().set_gguf_parameters()
2662
- hparams = self.hparams
2663
- if self.global_config['model_type'] == 'qwen2_vl':
2664
- self.gguf_writer.add_vision_projector_type(gguf.VisionProjectorType.QWEN2VL)
2665
- elif self.global_config['model_type'] == 'qwen2_5_vl':
2666
- self.gguf_writer.add_vision_projector_type(gguf.VisionProjectorType.QWEN25VL)
2765
+ assert self.hparams_vision is not None
2766
+ hparams = self.hparams_vision
2767
+ model_type = self.global_config['model_type']
2768
+ if model_type == 'qwen2_vl':
2769
+ self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.QWEN2VL)
2770
+ elif model_type == 'qwen2_5_vl' or model_type == 'qwen2_5_omni':
2771
+ if model_type == 'qwen2_5_omni':
2772
+ self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.QWEN25O)
2773
+ else:
2774
+ self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.QWEN25VL)
2667
2775
  self.gguf_writer.add_vision_use_silu(True)
2668
2776
  # find n_wa_pattern (window attention pattern)
2669
2777
  fullatt_block_indexes = hparams.get("fullatt_block_indexes")
@@ -2721,12 +2829,72 @@ class Qwen2VLVisionModel(VisionModel):
2721
2829
  return [] # skip other tensors
2722
2830
 
2723
2831
 
2832
+ @ModelBase.register("Qwen2_5OmniModel")
2833
+ class Qwen25OmniModel(Qwen2VLVisionModel):
2834
+ has_vision_encoder = True
2835
+ has_audio_encoder = True
2836
+
2837
+ def __init__(self, *args, **kwargs):
2838
+ super().__init__(*args, **kwargs)
2839
+ assert self.hparams_audio is not None
2840
+ self.hparams_audio["hidden_size"] = self.hparams_audio["d_model"]
2841
+ self.hparams_audio["intermediate_size"] = self.hparams_audio["encoder_ffn_dim"]
2842
+ self.hparams_audio["num_attention_heads"] = self.hparams_audio["encoder_attention_heads"]
2843
+
2844
+ def set_gguf_parameters(self):
2845
+ super().set_gguf_parameters()
2846
+ assert self.hparams_audio is not None
2847
+ self.gguf_writer.add_audio_num_mel_bins(self.hparams_audio["num_mel_bins"])
2848
+ self.gguf_writer.add_audio_attention_layernorm_eps(self.hparams_audio.get("layer_norm_eps", 1e-5))
2849
+
2850
+ def get_vision_config(self) -> dict[str, Any] | None:
2851
+ return self.global_config["thinker_config"].get("vision_config")
2852
+
2853
+ def get_audio_config(self) -> dict[str, Any] | None:
2854
+ return self.global_config["thinker_config"].get("audio_config")
2855
+
2856
+ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
2857
+ # SinusoidsPositionEmbedding
2858
+ assert self.hparams_audio is not None
2859
+ max_timescale = 10000
2860
+ length = 1500
2861
+ channels = self.hparams_audio["hidden_size"]
2862
+ log_timescale_increment = np.log(max_timescale) / (channels // 2 - 1)
2863
+ inv_timescales = torch.exp(-log_timescale_increment * torch.arange(channels // 2).float())
2864
+ scaled_time = torch.arange(length)[:, np.newaxis] * inv_timescales[np.newaxis, :]
2865
+ pos_embd = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=1).to(dtype=torch.float32)
2866
+ yield ("audio_tower.embed_positions.weight", pos_embd)
2867
+
2868
+ def tensor_force_quant(self, name, new_name, bid, n_dims):
2869
+ del bid, new_name, n_dims # unused
2870
+ if ".conv" in name and ".weight" in name:
2871
+ return gguf.GGMLQuantizationType.F16
2872
+ return False
2873
+
2874
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
2875
+ if name.startswith("thinker."):
2876
+ name = name.replace("thinker.", "")
2877
+
2878
+ if name.startswith("audio_tower"):
2879
+ # process audio tensors
2880
+ if "conv1.bias" in name or "conv2.bias" in name:
2881
+ # transpose conv1 and conv2 bias
2882
+ data_torch = data_torch.unsqueeze(-1)
2883
+ if "audio_bos_eos_token" in name:
2884
+ # this tensor is left unused in transformers code
2885
+ # https://github.com/huggingface/transformers/blob/6e3063422c4b1c014aa60c32b9254fd2902f0f28/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py#L1809
2886
+ return []
2887
+ return [(self.map_tensor_name(name), data_torch)]
2888
+
2889
+ return super().modify_tensors(data_torch, name, bid)
2890
+
2891
+
2724
2892
  @ModelBase.register("InternVisionModel")
2725
- class InternVisionModel(VisionModel):
2893
+ class InternVisionModel(MmprojModel):
2726
2894
  def set_gguf_parameters(self):
2727
2895
  super().set_gguf_parameters()
2728
2896
  hparams = self.hparams
2729
- self.gguf_writer.add_vision_projector_type(gguf.VisionProjectorType.INTERNVL)
2897
+ self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.INTERNVL)
2730
2898
  self.gguf_writer.add_vision_attention_layernorm_eps(hparams["layer_norm_eps"])
2731
2899
  # hidden_act
2732
2900
  if hparams["hidden_act"] == "silu":
@@ -3517,7 +3685,7 @@ class InternLM3Model(TextModel):
3517
3685
  return [(self.map_tensor_name(name), data_torch)]
3518
3686
 
3519
3687
 
3520
- @ModelBase.register("BertModel", "BertForMaskedLM", "CamembertModel")
3688
+ @ModelBase.register("BertModel", "BertForMaskedLM", "CamembertModel", "BertForSequenceClassification")
3521
3689
  class BertModel(TextModel):
3522
3690
  model_arch = gguf.MODEL_ARCH.BERT
3523
3691
 
@@ -3525,11 +3693,21 @@ class BertModel(TextModel):
3525
3693
  super().__init__(*args, **kwargs)
3526
3694
  self.vocab_size = None
3527
3695
 
3696
+ if cls_out_labels := self.hparams.get("id2label"):
3697
+ if len(cls_out_labels) == 2 and cls_out_labels[0] == "LABEL_0":
3698
+ # Remove dummy labels added by AutoConfig
3699
+ cls_out_labels = None
3700
+ self.cls_out_labels = cls_out_labels
3701
+
3528
3702
  def set_gguf_parameters(self):
3529
3703
  super().set_gguf_parameters()
3530
3704
  self.gguf_writer.add_causal_attention(False)
3531
3705
  self._try_set_pooling_type()
3532
3706
 
3707
+ if self.cls_out_labels:
3708
+ key_name = gguf.Keys.Classifier.OUTPUT_LABELS.format(arch = gguf.MODEL_ARCH_NAMES[self.model_arch])
3709
+ self.gguf_writer.add_array(key_name, [v for k, v in sorted(self.cls_out_labels.items())])
3710
+
3533
3711
  def set_vocab(self):
3534
3712
  tokens, toktypes, tokpre = self.get_vocab_base()
3535
3713
  self.vocab_size = len(tokens)
@@ -3580,6 +3758,14 @@ class BertModel(TextModel):
3580
3758
  if name.startswith("cls.seq_relationship"):
3581
3759
  return []
3582
3760
 
3761
+ if self.cls_out_labels:
3762
+ # For BertForSequenceClassification (direct projection layer)
3763
+ if name == "classifier.weight":
3764
+ name = "classifier.out_proj.weight"
3765
+
3766
+ if name == "classifier.bias":
3767
+ name = "classifier.out_proj.bias"
3768
+
3583
3769
  return [(self.map_tensor_name(name), data_torch)]
3584
3770
 
3585
3771
  def _xlmroberta_tokenizer_init(self) -> None:
@@ -3599,44 +3785,93 @@ class BertModel(TextModel):
3599
3785
  from sentencepiece import sentencepiece_model_pb2 as model
3600
3786
 
3601
3787
  tokenizer_path = self.dir_model / 'sentencepiece.bpe.model'
3788
+
3789
+ tokenizer_json = {}
3790
+ tokenizer_config_json = {}
3602
3791
  if not tokenizer_path.is_file():
3603
- raise FileNotFoundError(f"File not found: {tokenizer_path}")
3792
+ tokenizer_path = self.dir_model / 'tokenizer.json'
3793
+ tokenizer_config_path = self.dir_model / 'tokenizer_config.json'
3604
3794
 
3605
- sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue]
3606
- sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
3607
- assert sentencepiece_model.trainer_spec.model_type == 1 # UNIGRAM
3795
+ if not tokenizer_path.is_file():
3796
+ raise FileNotFoundError(f"File not found: {tokenizer_path}")
3608
3797
 
3609
- add_prefix = sentencepiece_model.normalizer_spec.add_dummy_prefix
3610
- remove_whitespaces = sentencepiece_model.normalizer_spec.remove_extra_whitespaces
3611
- precompiled_charsmap = sentencepiece_model.normalizer_spec.precompiled_charsmap
3798
+ from base64 import b64decode
3799
+ from transformers import AutoTokenizer
3800
+ tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
3612
3801
 
3613
- tokenizer = SentencePieceProcessor()
3614
- tokenizer.LoadFromFile(str(tokenizer_path))
3802
+ with open(tokenizer_path, "r", encoding="utf-8") as fp:
3803
+ tokenizer_json = json.load(fp)
3615
3804
 
3616
- vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
3805
+ if tokenizer_config_path.is_file():
3806
+ with open(tokenizer_config_path, "r", encoding="utf-8") as fp:
3807
+ tokenizer_config_json = json.load(fp)
3808
+
3809
+ add_prefix = tokenizer.add_prefix_space
3810
+ remove_whitespaces = tokenizer.clean_up_tokenization_spaces
3811
+ precompiled_charsmap = b64decode(tokenizer_json["normalizer"]["precompiled_charsmap"])
3812
+
3813
+ vocab_size = self.hparams.get("vocab_size", tokenizer.vocab_size)
3814
+ else:
3815
+ sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue]
3816
+ sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
3817
+ assert sentencepiece_model.trainer_spec.model_type == 1 # UNIGRAM
3818
+
3819
+ add_prefix = sentencepiece_model.normalizer_spec.add_dummy_prefix
3820
+ remove_whitespaces = sentencepiece_model.normalizer_spec.remove_extra_whitespaces
3821
+ precompiled_charsmap = sentencepiece_model.normalizer_spec.precompiled_charsmap
3822
+
3823
+ tokenizer = SentencePieceProcessor()
3824
+ tokenizer.LoadFromFile(str(tokenizer_path))
3825
+
3826
+ vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
3617
3827
 
3618
3828
  tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
3619
3829
  scores: list[float] = [-10000.0] * vocab_size
3620
3830
  toktypes: list[int] = [SentencePieceTokenTypes.UNUSED] * vocab_size
3621
3831
 
3622
- for token_id in range(tokenizer.vocab_size()):
3623
- piece = tokenizer.IdToPiece(token_id)
3624
- text = piece.encode("utf-8")
3625
- score = tokenizer.GetScore(token_id)
3832
+ if isinstance(tokenizer, SentencePieceProcessor):
3833
+ for token_id in range(tokenizer.vocab_size()):
3834
+ piece = tokenizer.IdToPiece(token_id)
3835
+ text = piece.encode("utf-8")
3836
+ score = tokenizer.GetScore(token_id)
3626
3837
 
3627
- toktype = SentencePieceTokenTypes.NORMAL
3628
- if tokenizer.IsUnknown(token_id):
3629
- toktype = SentencePieceTokenTypes.UNKNOWN
3630
- elif tokenizer.IsControl(token_id):
3631
- toktype = SentencePieceTokenTypes.CONTROL
3632
- elif tokenizer.IsUnused(token_id):
3633
- toktype = SentencePieceTokenTypes.UNUSED
3634
- elif tokenizer.IsByte(token_id):
3635
- toktype = SentencePieceTokenTypes.BYTE
3838
+ toktype = SentencePieceTokenTypes.NORMAL
3839
+ if tokenizer.IsUnknown(token_id):
3840
+ toktype = SentencePieceTokenTypes.UNKNOWN
3841
+ elif tokenizer.IsControl(token_id):
3842
+ toktype = SentencePieceTokenTypes.CONTROL
3843
+ elif tokenizer.IsUnused(token_id):
3844
+ toktype = SentencePieceTokenTypes.UNUSED
3845
+ elif tokenizer.IsByte(token_id):
3846
+ toktype = SentencePieceTokenTypes.BYTE
3636
3847
 
3637
- tokens[token_id] = text
3638
- scores[token_id] = score
3639
- toktypes[token_id] = toktype
3848
+ tokens[token_id] = text
3849
+ scores[token_id] = score
3850
+ toktypes[token_id] = toktype
3851
+ else:
3852
+ added_vocab = tokenizer.get_added_vocab()
3853
+ unk_token = tokenizer_config_json.get("unk_token")
3854
+ unk_token_id = added_vocab.get(unk_token, tokenizer_json["model"].get("unk_id", 3))
3855
+
3856
+ for token_id in range(vocab_size):
3857
+ piece = tokenizer._convert_id_to_token(token_id)
3858
+ text = piece.encode("utf-8")
3859
+ score = tokenizer_json["model"]["vocab"][token_id][1]
3860
+
3861
+ toktype = SentencePieceTokenTypes.NORMAL
3862
+ if token_id == unk_token_id:
3863
+ toktype = SentencePieceTokenTypes.UNKNOWN
3864
+ elif token_id in tokenizer.all_special_ids:
3865
+ toktype = SentencePieceTokenTypes.CONTROL
3866
+ elif token_id in added_vocab.values():
3867
+ toktype = SentencePieceTokenTypes.USER_DEFINED
3868
+ # No reliable way to detect this, but jina doesn't have any
3869
+ # elif tokenizer.IsByte(token_id):
3870
+ # toktype = SentencePieceTokenTypes.BYTE
3871
+
3872
+ tokens[token_id] = text
3873
+ scores[token_id] = score
3874
+ toktypes[token_id] = toktype
3640
3875
 
3641
3876
  if vocab_size > len(tokens):
3642
3877
  pad_count = vocab_size - len(tokens)
@@ -3646,15 +3881,16 @@ class BertModel(TextModel):
3646
3881
  scores.append(-1000.0)
3647
3882
  toktypes.append(SentencePieceTokenTypes.UNUSED)
3648
3883
 
3649
- # realign tokens (see HF tokenizer code)
3650
- tokens = [b'<s>', b'<pad>', b'</s>', b'<unk>'] + tokens[3:-1]
3651
- scores = [0.0, 0.0, 0.0, 0.0] + scores[3:-1]
3652
- toktypes = [
3653
- SentencePieceTokenTypes.CONTROL,
3654
- SentencePieceTokenTypes.CONTROL,
3655
- SentencePieceTokenTypes.CONTROL,
3656
- SentencePieceTokenTypes.UNKNOWN,
3657
- ] + toktypes[3:-1]
3884
+ if isinstance(tokenizer, SentencePieceProcessor):
3885
+ # realign tokens (see HF tokenizer code)
3886
+ tokens = [b'<s>', b'<pad>', b'</s>', b'<unk>'] + tokens[3:-1]
3887
+ scores = [0.0, 0.0, 0.0, 0.0] + scores[3:-1]
3888
+ toktypes = [
3889
+ SentencePieceTokenTypes.CONTROL,
3890
+ SentencePieceTokenTypes.CONTROL,
3891
+ SentencePieceTokenTypes.CONTROL,
3892
+ SentencePieceTokenTypes.UNKNOWN,
3893
+ ] + toktypes[3:-1]
3658
3894
 
3659
3895
  self.gguf_writer.add_tokenizer_model("t5")
3660
3896
  self.gguf_writer.add_tokenizer_pre("default")
@@ -3674,7 +3910,27 @@ class BertModel(TextModel):
3674
3910
  self.gguf_writer.add_add_eos_token(True)
3675
3911
 
3676
3912
 
3677
- @ModelBase.register("RobertaModel")
3913
+ @ModelBase.register("DistilBertModel", "DistilBertForMaskedLM", "DistilBertForSequenceClassification")
3914
+ class DistilBertModel(BertModel):
3915
+ model_arch = gguf.MODEL_ARCH.BERT
3916
+
3917
+ def set_gguf_parameters(self):
3918
+ self.gguf_writer.add_layer_norm_eps(1e-12)
3919
+ logger.info("gguf: layer norm epsilon = 1e-12")
3920
+ super().set_gguf_parameters()
3921
+
3922
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
3923
+ if name.startswith("distilbert."):
3924
+ name = name[11:]
3925
+
3926
+ # These layers act as MLM head, so we don't need them
3927
+ if name.startswith("vocab_"):
3928
+ return []
3929
+
3930
+ return super().modify_tensors(data_torch, name, bid)
3931
+
3932
+
3933
+ @ModelBase.register("RobertaModel", "RobertaForSequenceClassification")
3678
3934
  class RobertaModel(BertModel):
3679
3935
  model_arch = gguf.MODEL_ARCH.BERT
3680
3936
 
@@ -3984,11 +4240,11 @@ class Gemma3Model(TextModel):
3984
4240
 
3985
4241
 
3986
4242
  @ModelBase.register("Gemma3ForConditionalGeneration")
3987
- class Gemma3VisionModel(VisionModel):
4243
+ class Gemma3VisionModel(MmprojModel):
3988
4244
  def set_gguf_parameters(self):
3989
4245
  super().set_gguf_parameters()
3990
4246
  hparams = self.hparams
3991
- self.gguf_writer.add_vision_projector_type(gguf.VisionProjectorType.GEMMA3)
4247
+ self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.GEMMA3)
3992
4248
  # default values below are taken from HF tranformers code
3993
4249
  self.gguf_writer.add_vision_attention_layernorm_eps(hparams.get("layer_norm_eps", 1e-6))
3994
4250
  self.gguf_writer.add_vision_use_gelu(True)
@@ -5746,11 +6002,20 @@ class GraniteModel(LlamaModel):
5746
6002
  logger.info("gguf: (granite) logits_scale = %s", logits_scale)
5747
6003
 
5748
6004
 
5749
- @ModelBase.register("GraniteMoeForCausalLM")
6005
+ @ModelBase.register("GraniteMoeForCausalLM", "GraniteMoeSharedForCausalLM")
5750
6006
  class GraniteMoeModel(GraniteModel):
5751
6007
  """Conversion for IBM's GraniteMoeForCausalLM"""
5752
6008
  model_arch = gguf.MODEL_ARCH.GRANITE_MOE
5753
6009
 
6010
+ def set_gguf_parameters(self):
6011
+ """GraniteMoeShared uses GraniteMoe parameters plus the following:
6012
+ - shared_intermediate_size
6013
+ """
6014
+ super().set_gguf_parameters()
6015
+ if shared_feed_forward_length := self.hparams.get("shared_intermediate_size"):
6016
+ self.gguf_writer.add_expert_shared_feed_forward_length(shared_feed_forward_length)
6017
+ logger.info("gguf: (granitemoeshared) shared_feed_forward_length = %s", shared_feed_forward_length)
6018
+
5754
6019
  def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
5755
6020
  """In modeling_granitemoe, the JetMoe implementation of parallel experts
5756
6021
  is used. This essentially merges w1 and w3 into a single tensor with 2x
@@ -5761,12 +6026,21 @@ class GraniteMoeModel(GraniteModel):
5761
6026
  if name.endswith("block_sparse_moe.input_linear.weight"):
5762
6027
  ffn_dim = self.hparams["intermediate_size"]
5763
6028
  assert data_torch.shape[-2] == 2 * ffn_dim, "Merged FFN tensor size must be 2 * intermediate_size"
5764
- gate, up = data_torch[..., :ffn_dim, :], data_torch[..., ffn_dim:, :]
6029
+ gate, up = data_torch.split(ffn_dim, dim=-2)
5765
6030
  return [
5766
6031
  (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_EXP, bid), gate),
5767
6032
  (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_EXP, bid), up),
5768
6033
  ]
5769
6034
 
6035
+ if name.endswith("shared_mlp.input_linear.weight"):
6036
+ ffn_dim = self.hparams["shared_intermediate_size"]
6037
+ assert data_torch.shape[-2] == 2 * ffn_dim, "Merged FFN tensor size must be 2 * shared_intermediate_size"
6038
+ gate, up = data_torch.split(ffn_dim, dim=-2)
6039
+ return [
6040
+ (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_SHEXP, bid), gate),
6041
+ (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_SHEXP, bid), up),
6042
+ ]
6043
+
5770
6044
  return super().modify_tensors(data_torch, name, bid)
5771
6045
 
5772
6046
 
@@ -5917,6 +6191,65 @@ class ChameleonModel(TextModel):
5917
6191
  return data_torch
5918
6192
 
5919
6193
 
6194
+ @ModelBase.register("UltravoxModel")
6195
+ class UltravoxModel(TextModel):
6196
+ model_arch = gguf.MODEL_ARCH.LLAMA # dummy
6197
+
6198
+ def __init__(self, *args, **kwargs):
6199
+ super().__init__(*args, **kwargs)
6200
+ raise NotImplementedError("Ultravox does not have text decoder. Instead, it uses Llama or other models for text. If you want to get the audio encoder, please use --mmproj argument")
6201
+
6202
+
6203
+ @ModelBase.register("Qwen2AudioForConditionalGeneration")
6204
+ class WhisperEncoderModel(MmprojModel):
6205
+ has_vision_encoder = False # no vision encoder
6206
+ has_audio_encoder = True
6207
+
6208
+ def __init__(self, *args, **kwargs):
6209
+ super().__init__(*args, **kwargs)
6210
+ self.hparams["hidden_size"] = self.hparams["d_model"]
6211
+ self.hparams["intermediate_size"] = self.hparams["encoder_ffn_dim"]
6212
+ self.hparams["num_attention_heads"] = self.hparams["encoder_attention_heads"]
6213
+
6214
+ def set_gguf_parameters(self):
6215
+ super().set_gguf_parameters()
6216
+ self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.QWEN2A)
6217
+ self.gguf_writer.add_audio_num_mel_bins(self.hparams["num_mel_bins"])
6218
+ self.gguf_writer.add_audio_attention_layernorm_eps(self.hparams.get("layer_norm_eps", 1e-5))
6219
+
6220
+ def tensor_force_quant(self, name, new_name, bid, n_dims):
6221
+ del bid, new_name, n_dims # unused
6222
+ if ".conv" in name and ".weight" in name:
6223
+ return gguf.GGMLQuantizationType.F16
6224
+ return False
6225
+
6226
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
6227
+ del bid # unused
6228
+
6229
+ if name.startswith("language_model."):
6230
+ # skip language model tensors
6231
+ return []
6232
+
6233
+ # prevent clash naming with vision tensors
6234
+ if name.startswith("multi_modal_projector"):
6235
+ name = "audio." + name
6236
+
6237
+ if "conv1.bias" in name or "conv2.bias" in name:
6238
+ # transpose conv1 and conv2 bias
6239
+ data_torch = data_torch.unsqueeze(-1)
6240
+
6241
+ return [(self.map_tensor_name(name), data_torch)]
6242
+
6243
+
6244
+ @ModelBase.register("UltravoxModel")
6245
+ class UltravoxWhisperEncoderModel(WhisperEncoderModel):
6246
+ has_vision_encoder = False # no vision encoder
6247
+ has_audio_encoder = True
6248
+
6249
+ def set_gguf_parameters(self):
6250
+ super().set_gguf_parameters()
6251
+ self.gguf_writer.add_audio_stack_factor(self.global_config["stack_factor"])
6252
+
5920
6253
  ###### CONVERSION LOGIC ######
5921
6254
 
5922
6255
 
@@ -6092,13 +6425,15 @@ def split_str_to_n_bytes(split_str: str) -> int:
6092
6425
 
6093
6426
 
6094
6427
  def get_model_architecture(hparams: dict[str, Any], model_type: ModelType) -> str:
6428
+ # TODO @ngxson : this won't work correctly if the model has both audio & vision encoders
6429
+ # maybe we should fallback to text model's arch in that case, since not many models have both
6095
6430
  text_config = hparams.get("text_config", {})
6096
6431
  vision_config = hparams.get("vision_config", {})
6097
6432
  arch = hparams["architectures"][0]
6098
6433
  # if "architectures" is found in the sub-config, use that instead
6099
6434
  if model_type == ModelType.TEXT and text_config.get("architectures") is not None:
6100
6435
  arch = text_config["architectures"][0]
6101
- elif model_type == ModelType.VISION and vision_config.get("architectures") is not None:
6436
+ elif model_type == ModelType.MMPROJ and vision_config.get("architectures") is not None:
6102
6437
  arch = vision_config["architectures"][0]
6103
6438
  return arch
6104
6439
 
@@ -6161,7 +6496,7 @@ def main() -> None:
6161
6496
 
6162
6497
  with torch.inference_mode():
6163
6498
  output_type = ftype_map[args.outtype]
6164
- model_type = ModelType.VISION if args.mmproj else ModelType.TEXT
6499
+ model_type = ModelType.MMPROJ if args.mmproj else ModelType.TEXT
6165
6500
  hparams = ModelBase.load_hparams(dir_model)
6166
6501
  model_architecture = get_model_architecture(hparams, model_type)
6167
6502
  logger.info(f"Model architecture: {model_architecture}")