@novastera-oss/llamarn 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. package/README.md +80 -14
  2. package/RNLlamaCpp.podspec +10 -3
  3. package/android/CMakeLists.txt +8 -0
  4. package/android/src/main/cpp/include/llama.h +62 -125
  5. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  11. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  13. package/cpp/build-info.cpp +2 -2
  14. package/cpp/llama.cpp/README.md +11 -3
  15. package/cpp/llama.cpp/build-xcframework.sh +1 -0
  16. package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
  17. package/cpp/llama.cpp/common/arg.cpp +153 -113
  18. package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
  19. package/cpp/llama.cpp/common/chat-parser.h +117 -0
  20. package/cpp/llama.cpp/common/chat.cpp +847 -699
  21. package/cpp/llama.cpp/common/chat.h +73 -6
  22. package/cpp/llama.cpp/common/common.cpp +50 -82
  23. package/cpp/llama.cpp/common/common.h +21 -17
  24. package/cpp/llama.cpp/common/json-partial.cpp +255 -0
  25. package/cpp/llama.cpp/common/json-partial.h +37 -0
  26. package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
  27. package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
  28. package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
  29. package/cpp/llama.cpp/common/regex-partial.h +56 -0
  30. package/cpp/llama.cpp/common/sampling.cpp +7 -8
  31. package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
  32. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
  33. package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
  34. package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
  35. package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
  36. package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
  37. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
  38. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
  39. package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
  40. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
  41. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
  42. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
  43. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
  44. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
  45. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
  56. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
  57. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
  58. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
  59. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
  60. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
  61. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
  62. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
  63. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
  64. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
  65. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
  66. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
  67. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
  68. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
  69. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
  70. package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
  71. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
  72. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
  73. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
  74. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
  75. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  76. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  77. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
  78. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
  79. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  80. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
  81. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
  82. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
  84. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
  85. package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
  86. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
  87. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
  88. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
  89. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
  90. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
  91. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
  92. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
  93. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  94. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
  95. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
  96. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
  97. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  98. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
  99. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
  100. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
  101. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
  102. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
  103. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
  104. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
  105. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
  106. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
  107. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
  108. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
  109. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
  110. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
  111. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
  112. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
  113. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
  114. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
  115. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
  116. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
  117. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
  118. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
  119. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
  120. package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
  121. package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
  122. package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
  123. package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
  124. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
  125. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
  126. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
  127. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
  128. package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
  129. package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
  130. package/cpp/llama.cpp/include/llama.h +62 -125
  131. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
  132. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
  133. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
  134. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
  135. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
  136. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
  137. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
  138. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
  139. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
  140. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
  141. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
  142. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
  143. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
  144. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
  145. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
  146. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
  147. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
  148. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
  149. package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
  150. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
  151. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
  152. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
  153. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
  154. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
  155. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
  156. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
  157. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
  158. package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
  159. package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
  160. package/cpp/llama.cpp/models/templates/README.md +2 -0
  161. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
  162. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
  163. package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  164. package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
  165. package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
  166. package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
  167. package/cpp/llama.cpp/src/llama-arch.h +2 -0
  168. package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
  169. package/cpp/llama.cpp/src/llama-context.cpp +340 -123
  170. package/cpp/llama.cpp/src/llama-context.h +30 -0
  171. package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
  172. package/cpp/llama.cpp/src/llama-cparams.h +2 -0
  173. package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
  174. package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
  175. package/cpp/llama.cpp/src/llama-graph.h +52 -7
  176. package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
  177. package/cpp/llama.cpp/src/llama-hparams.h +37 -5
  178. package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
  179. package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
  180. package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
  181. package/cpp/llama.cpp/src/llama-memory.h +4 -3
  182. package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
  183. package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
  184. package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
  185. package/cpp/llama.cpp/src/llama-model.cpp +529 -172
  186. package/cpp/llama.cpp/src/llama-model.h +6 -1
  187. package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
  188. package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
  189. package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
  190. package/cpp/llama.cpp/src/llama-vocab.h +6 -0
  191. package/cpp/llama.cpp/src/llama.cpp +14 -0
  192. package/cpp/rn-completion.cpp +4 -2
  193. package/ios/include/chat.h +73 -6
  194. package/ios/include/common/minja/chat-template.hpp +9 -5
  195. package/ios/include/common/minja/minja.hpp +69 -36
  196. package/ios/include/common.h +21 -17
  197. package/ios/include/llama.h +62 -125
  198. package/ios/libs/llama.xcframework/Info.plist +19 -19
  199. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  200. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
  201. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  202. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
  203. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
  204. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  205. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  206. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  207. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
  208. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  209. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  210. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  211. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  212. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  213. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  214. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
  215. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
  216. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
  217. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
  218. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
  219. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
  220. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
  221. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  222. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
  223. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
  224. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
  225. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  226. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  227. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  228. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
  229. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  230. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
  231. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
  232. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  233. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  234. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
  235. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
  236. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  237. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  238. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  239. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  240. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  241. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
  242. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  243. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
  244. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
  245. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  246. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  247. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
  248. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
  249. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  250. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  251. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  252. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  253. package/package.json +1 -1
  254. package/cpp/llama.cpp/common/stb_image.h +0 -7988
  255. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  256. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  257. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  258. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  259. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  260. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  261. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  262. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  263. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  264. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  265. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  266. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
@@ -68,7 +68,7 @@ class TensorNameMap:
68
68
  "output_layer", # chatglm
69
69
  "head", # rwkv
70
70
  "head.out", # wavtokenizer
71
- "language_model.lm_head", # llama4
71
+ "lm_head", # llama4
72
72
  ),
73
73
 
74
74
  # Output norm
@@ -91,7 +91,7 @@ class TensorNameMap:
91
91
  "rwkv.ln_out", # rwkv6
92
92
  "model.ln_out", # rwkv7
93
93
  "backbone.final_layer_norm", # wavtokenizer
94
- "language_model.model.norm", # llama4
94
+ "model.norm", # llama4
95
95
  ),
96
96
 
97
97
  # Rope frequencies
@@ -133,7 +133,7 @@ class TensorNameMap:
133
133
  "transformer.layers.{bid}.attn_norm", # openelm
134
134
  "rwkv.blocks.{bid}.ln1", # rwkv6
135
135
  "model.layers.{bid}.ln1", # rwkv7
136
- "language_model.model.layers.{bid}.input_layernorm", # llama4
136
+ "model.layers.{bid}.input_layernorm", # llama4
137
137
  ),
138
138
 
139
139
  # Attention norm 2
@@ -157,6 +157,7 @@ class TensorNameMap:
157
157
  "h.{bid}.attn.c_attn", # gpt2
158
158
  "transformer.h.{bid}.mixer.Wqkv", # phi2
159
159
  "encoder.layers.{bid}.attn.Wqkv", # nomic-bert
160
+ "encoder.layers.{bid}.mixer.Wqkv", # jina
160
161
  "model.layers.{bid}.self_attn.qkv_proj", # phi3
161
162
  "encoder.layers.{bid}.self_attention.query_key_value", # chatglm
162
163
  "transformer.layers.{bid}.attn.qkv_proj", # openelm
@@ -168,12 +169,13 @@ class TensorNameMap:
168
169
  "model.layers.{bid}.self_attn.q_proj_no_perm", # llama-custom
169
170
  "layers.{bid}.attention.wq", # llama-pth
170
171
  "encoder.layer.{bid}.attention.self.query", # bert
172
+ "transformer.layer.{bid}.attention.q_lin", # distillbert
171
173
  "transformer.h.{bid}.attn.q_proj", # gpt-j
172
174
  "model.layers.layers.{bid}.self_attn.q_proj", # plamo
173
175
  "model.layers.{bid}.attention.wq", # internlm2
174
176
  "transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
175
177
  "transformer.h.{bid}.attn.attention.q_proj", # exaone
176
- "language_model.model.layers.{bid}.self_attn.q_proj", # llama4
178
+ "model.layers.{bid}.self_attn.q_proj", # llama4
177
179
  ),
178
180
 
179
181
  # Attention key
@@ -182,13 +184,14 @@ class TensorNameMap:
182
184
  "model.layers.{bid}.self_attn.k_proj_no_perm", # llama-custom
183
185
  "layers.{bid}.attention.wk", # llama-pth
184
186
  "encoder.layer.{bid}.attention.self.key", # bert
187
+ "transformer.layer.{bid}.attention.k_lin", # distillbert
185
188
  "transformer.h.{bid}.attn.k_proj", # gpt-j
186
189
  "transformer.h.{bid}.attn.k", # refact
187
190
  "model.layers.layers.{bid}.self_attn.k_proj", # plamo
188
191
  "model.layers.{bid}.attention.wk", # internlm2
189
192
  "transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
190
193
  "transformer.h.{bid}.attn.attention.k_proj", # exaone
191
- "language_model.model.layers.{bid}.self_attn.k_proj", # llama4
194
+ "model.layers.{bid}.self_attn.k_proj", # llama4
192
195
  ),
193
196
 
194
197
  # Attention value
@@ -196,13 +199,14 @@ class TensorNameMap:
196
199
  "model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2 phimoe
197
200
  "layers.{bid}.attention.wv", # llama-pth
198
201
  "encoder.layer.{bid}.attention.self.value", # bert
202
+ "transformer.layer.{bid}.attention.v_lin", # distillbert
199
203
  "transformer.h.{bid}.attn.v_proj", # gpt-j
200
204
  "transformer.h.{bid}.attn.v", # refact
201
205
  "model.layers.layers.{bid}.self_attn.v_proj", # plamo
202
206
  "model.layers.{bid}.attention.wv", # internlm2
203
207
  "transformer.decoder_layer.{bid}.multi_head_attention.value",# Grok
204
208
  "transformer.h.{bid}.attn.attention.v_proj", # exaone
205
- "language_model.model.layers.{bid}.self_attn.v_proj", # llama4
209
+ "model.layers.{bid}.self_attn.v_proj", # llama4
206
210
  ),
207
211
 
208
212
  # Attention output
@@ -216,6 +220,7 @@ class TensorNameMap:
216
220
  "model.layers.{bid}.self_attn.linear_attn", # deci
217
221
  "layers.{bid}.attention.wo", # llama-pth
218
222
  "encoder.layer.{bid}.attention.output.dense", # bert
223
+ "transformer.layer.{bid}.attention.out_lin", # distillbert
219
224
  "transformer.h.{bid}.attn.out_proj", # gpt-j
220
225
  "language_model.encoder.layers.{bid}.self_attention.dense", # persimmon
221
226
  "model.layers.{bid}.self_attn.dense", # persimmon
@@ -224,17 +229,19 @@ class TensorNameMap:
224
229
  "model.layers.layers.{bid}.self_attn.o_proj", # plamo
225
230
  "model.layers.{bid}.attention.wo", # internlm2
226
231
  "encoder.layers.{bid}.attn.out_proj", # nomic-bert
232
+ "encoder.layers.{bid}.mixer.out_proj", # jina
227
233
  "transformer.decoder_layer.{bid}.multi_head_attention.linear", # Grok
228
234
  "transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
229
235
  "encoder.layers.{bid}.self_attention.dense", # chatglm
230
236
  "transformer.layers.{bid}.attn.out_proj", # openelm
231
237
  "transformer.h.{bid}.attn.attention.out_proj", # exaone
232
- "language_model.model.layers.{bid}.self_attn.o_proj", # llama4
238
+ "model.layers.{bid}.self_attn.o_proj", # llama4
233
239
  ),
234
240
 
235
241
  # Attention output norm
236
242
  MODEL_TENSOR.ATTN_OUT_NORM: (
237
243
  "encoder.layer.{bid}.attention.output.LayerNorm", # bert
244
+ "transformer.layer.{bid}.sa_layer_norm", # distillbert
238
245
  "encoder.layers.{bid}.norm1", # nomic-bert
239
246
  "transformer.decoder_layer.{bid}.rms_norm_1", # Grok
240
247
  "transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
@@ -268,7 +275,7 @@ class TensorNameMap:
268
275
  "transformer.decoder_layer.{bid}.rms_norm_2", # Grok
269
276
  "encoder.layers.{bid}.post_attention_layernorm", # chatglm
270
277
  "transformer.layers.{bid}.ffn_norm", # openelm
271
- "language_model.model.layers.{bid}.post_attention_layernorm", # llama4
278
+ "model.layers.{bid}.post_attention_layernorm", # llama4
272
279
  ),
273
280
 
274
281
  # Post feed-forward norm
@@ -289,7 +296,7 @@ class TensorNameMap:
289
296
  "transformer.decoder_layer.{bid}.router", # Grok
290
297
  "transformer.blocks.{bid}.ffn.router.layer", # dbrx
291
298
  "model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
292
- "language_model.model.layers.{bid}.feed_forward.router", # llama4
299
+ "model.layers.{bid}.feed_forward.router", # llama4
293
300
  "encoder.layers.{bid}.mlp.router.layer", # nomic-bert-moe
294
301
  ),
295
302
 
@@ -311,6 +318,7 @@ class TensorNameMap:
311
318
  "model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron olmo2
312
319
  "layers.{bid}.feed_forward.w3", # llama-pth
313
320
  "encoder.layer.{bid}.intermediate.dense", # bert
321
+ "transformer.layer.{bid}.ffn.lin1", # distillbert
314
322
  "transformer.h.{bid}.mlp.fc_in", # gpt-j
315
323
  "transformer.h.{bid}.mlp.linear_3", # refact
316
324
  "language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon
@@ -329,7 +337,7 @@ class TensorNameMap:
329
337
  "model.layers.{bid}.residual_mlp.w3", # arctic
330
338
  "encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
331
339
  "transformer.h.{bid}.mlp.c_fc_1", # exaone
332
- "language_model.model.layers.{bid}.feed_forward.up_proj", # llama4
340
+ "model.layers.{bid}.feed_forward.up_proj", # llama4
333
341
  ),
334
342
 
335
343
  MODEL_TENSOR.FFN_UP_EXP: (
@@ -338,14 +346,14 @@ class TensorNameMap:
338
346
  "transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
339
347
  "model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged)
340
348
  "model.layers.{bid}.block_sparse_moe.experts.w3", # phimoe (merged)
341
- "language_model.model.layers.{bid}.feed_forward.experts.up_proj", # llama4
349
+ "model.layers.{bid}.feed_forward.experts.up_proj", # llama4
342
350
  "encoder.layers.{bid}.mlp.experts.mlp.w1", # nomic-bert-moe
343
351
  ),
344
352
 
345
353
  MODEL_TENSOR.FFN_UP_SHEXP: (
346
- "model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
347
- "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
348
- "language_model.model.layers.{bid}.feed_forward.shared_expert.up_proj", # llama4
354
+ "model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
355
+ "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
356
+ "model.layers.{bid}.feed_forward.shared_expert.up_proj", # llama4
349
357
  ),
350
358
 
351
359
  # AWQ-activation gate
@@ -366,22 +374,22 @@ class TensorNameMap:
366
374
  "transformer.h.{bid}.mlp.linear_1", # refact
367
375
  "model.layers.{bid}.residual_mlp.w1", # arctic
368
376
  "transformer.h.{bid}.mlp.c_fc_0", # exaone
369
- "language_model.model.layers.{bid}.feed_forward.gate_proj", # llama4
377
+ "model.layers.{bid}.feed_forward.gate_proj", # llama4
370
378
  ),
371
379
 
372
380
  MODEL_TENSOR.FFN_GATE_EXP: (
373
- "layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
374
- "transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
375
- "transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
376
- "model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged)
377
- "model.layers.{bid}.block_sparse_moe.experts.w1", # phimoe (merged)
378
- "language_model.model.layers.{bid}.feed_forward.experts.gate_proj", # llama4
381
+ "layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
382
+ "transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
383
+ "transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
384
+ "model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged)
385
+ "model.layers.{bid}.block_sparse_moe.experts.w1", # phimoe (merged)
386
+ "model.layers.{bid}.feed_forward.experts.gate_proj", # llama4
379
387
  ),
380
388
 
381
389
  MODEL_TENSOR.FFN_GATE_SHEXP: (
382
- "model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
383
- "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
384
- "language_model.model.layers.{bid}.feed_forward.shared_expert.gate_proj", # llama4
390
+ "model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
391
+ "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
392
+ "model.layers.{bid}.feed_forward.shared_expert.gate_proj", # llama4
385
393
  ),
386
394
 
387
395
  # Feed-forward down
@@ -394,6 +402,7 @@ class TensorNameMap:
394
402
  "model.layers.{bid}.mlp.down_proj", # llama-hf nemotron olmo2
395
403
  "layers.{bid}.feed_forward.w2", # llama-pth
396
404
  "encoder.layer.{bid}.output.dense", # bert
405
+ "transformer.layer.{bid}.ffn.lin2", # distillbert
397
406
  "transformer.h.{bid}.mlp.fc_out", # gpt-j
398
407
  "language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon
399
408
  "model.layers.{bid}.mlp.dense_4h_to_h", # persimmon
@@ -410,7 +419,7 @@ class TensorNameMap:
410
419
  "encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
411
420
  "encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
412
421
  "model.layers.h.{bid}.mlp.c_proj", # exaone
413
- "language_model.model.layers.{bid}.feed_forward.down_proj", # llama4
422
+ "model.layers.{bid}.feed_forward.down_proj", # llama4
414
423
  ),
415
424
 
416
425
  MODEL_TENSOR.FFN_DOWN_EXP: (
@@ -420,14 +429,15 @@ class TensorNameMap:
420
429
  "model.layers.{bid}.mlp.experts.down_proj", # qwen2moe olmoe (merged)
421
430
  "model.layers.{bid}.block_sparse_moe.output_linear", # granitemoe
422
431
  "model.layers.{bid}.block_sparse_moe.experts.w2", # phimoe (merged)
423
- "language_model.model.layers.{bid}.feed_forward.experts.down_proj", # llama4
432
+ "model.layers.{bid}.feed_forward.experts.down_proj", # llama4
424
433
  "encoder.layers.{bid}.mlp.experts.mlp.w2", # nomic-bert-moe
425
434
  ),
426
435
 
427
436
  MODEL_TENSOR.FFN_DOWN_SHEXP: (
428
- "model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
429
- "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
430
- "language_model.model.layers.{bid}.feed_forward.shared_expert.down_proj", # llama4
437
+ "model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
438
+ "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
439
+ "model.layers.{bid}.feed_forward.shared_expert.down_proj", # llama4
440
+ "model.layers.{bid}.shared_mlp.output_linear", # granitemoe
431
441
  ),
432
442
 
433
443
  MODEL_TENSOR.ATTN_Q_NORM: (
@@ -454,6 +464,7 @@ class TensorNameMap:
454
464
 
455
465
  MODEL_TENSOR.LAYER_OUT_NORM: (
456
466
  "encoder.layer.{bid}.output.LayerNorm", # bert
467
+ "transformer.layer.{bid}.output_layer_norm", # distillbert
457
468
  "encoder.layers.{bid}.norm2", # nomic-bert
458
469
  "transformer.decoder_layer.{bid}.rms_norm_3", # Grok
459
470
  "encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
@@ -824,6 +835,7 @@ class TensorNameMap:
824
835
  MODEL_TENSOR.CLS: (
825
836
  "classifier", # jina
826
837
  "classifier.dense", # roberta
838
+ "pre_classifier", # distillbert
827
839
  ),
828
840
 
829
841
  MODEL_TENSOR.CLS_OUT: (
@@ -905,6 +917,7 @@ class TensorNameMap:
905
917
 
906
918
  MODEL_TENSOR.V_MMPROJ_MLP: (
907
919
  "model.mm_projector.mlp.mlp.{bid}",
920
+ "vision_model.vision_adapter.mlp.fc{bid}", # llama 4
908
921
  "mlp1.{bid}", # InternVL
909
922
  ),
910
923
 
@@ -914,6 +927,7 @@ class TensorNameMap:
914
927
 
915
928
  MODEL_TENSOR.V_ENC_EMBD_CLS: (
916
929
  "vision_tower.vision_model.embeddings.class_embedding",
930
+ "vision_model.class_embedding", # llama 4
917
931
  ),
918
932
 
919
933
  MODEL_TENSOR.V_ENC_EMBD_PATCH: (
@@ -921,6 +935,7 @@ class TensorNameMap:
921
935
  "vpm.embeddings.patch_embedding",
922
936
  "model.vision_model.embeddings.patch_embedding", # SmolVLM
923
937
  "vision_tower.patch_conv", # pixtral
938
+ "vision_model.patch_embedding.linear", # llama 4
924
939
  "visual.patch_embed.proj", # qwen2vl
925
940
  ),
926
941
 
@@ -928,12 +943,14 @@ class TensorNameMap:
928
943
  "vision_tower.vision_model.embeddings.position_embedding",
929
944
  "vpm.embeddings.position_embedding",
930
945
  "model.vision_model.embeddings.position_embedding", # SmolVLM
946
+ "vision_model.positional_embedding_vlm", # llama 4
931
947
  ),
932
948
 
933
949
  MODEL_TENSOR.V_ENC_ATTN_Q: (
934
950
  "vision_tower.vision_model.encoder.layers.{bid}.self_attn.q_proj",
935
951
  "vpm.encoder.layers.{bid}.self_attn.q_proj",
936
952
  "model.vision_model.encoder.layers.{bid}.self_attn.q_proj", # SmolVLM
953
+ "vision_model.model.layers.{bid}.self_attn.q_proj", # llama4
937
954
  "vision_tower.transformer.layers.{bid}.attention.q_proj", # pixtral
938
955
  "visual.blocks.{bid}.attn.q", # qwen2vl, generated
939
956
  ),
@@ -946,6 +963,7 @@ class TensorNameMap:
946
963
  "vision_tower.vision_model.encoder.layers.{bid}.self_attn.k_proj",
947
964
  "vpm.encoder.layers.{bid}.self_attn.k_proj",
948
965
  "model.vision_model.encoder.layers.{bid}.self_attn.k_proj", # SmolVLM
966
+ "vision_model.model.layers.{bid}.self_attn.k_proj", # llama4
949
967
  "vision_tower.transformer.layers.{bid}.attention.k_proj", # pixtral
950
968
  "visual.blocks.{bid}.attn.k", # qwen2vl, generated
951
969
  ),
@@ -958,6 +976,7 @@ class TensorNameMap:
958
976
  "vision_tower.vision_model.encoder.layers.{bid}.self_attn.v_proj",
959
977
  "vpm.encoder.layers.{bid}.self_attn.v_proj",
960
978
  "model.vision_model.encoder.layers.{bid}.self_attn.v_proj", # SmolVLM
979
+ "vision_model.model.layers.{bid}.self_attn.v_proj", # llama4
961
980
  "vision_tower.transformer.layers.{bid}.attention.v_proj", # pixtral
962
981
  "visual.blocks.{bid}.attn.v", # qwen2vl, generated
963
982
  ),
@@ -968,23 +987,26 @@ class TensorNameMap:
968
987
  "vpm.encoder.layers.{bid}.layer_norm1",
969
988
  "model.vision_model.encoder.layers.{bid}.layer_norm1", # SmolVLM
970
989
  "vision_tower.transformer.layers.{bid}.attention_norm", # pixtral
990
+ "vision_model.model.layers.{bid}.input_layernorm", # llama4
971
991
  "visual.blocks.{bid}.norm1", # qwen2vl
972
992
  ),
973
993
 
974
- MODEL_TENSOR.V_ENC_OUTPUT: (
994
+ MODEL_TENSOR.V_ENC_ATTN_O: (
975
995
  "vision_tower.vision_model.encoder.layers.{bid}.self_attn.out_proj",
976
996
  "vision_tower.vision_model.encoder.layers.{bid}.attn.proj", # InternVL
977
997
  "vpm.encoder.layers.{bid}.self_attn.out_proj",
978
998
  "model.vision_model.encoder.layers.{bid}.self_attn.out_proj", # SmolVLM
999
+ "vision_model.model.layers.{bid}.self_attn.o_proj", # llama4
979
1000
  "vision_tower.transformer.layers.{bid}.attention.o_proj", # pixtral
980
1001
  "visual.blocks.{bid}.attn.proj", # qwen2vl
981
1002
  ),
982
1003
 
983
- MODEL_TENSOR.V_ENC_OUTPUT_NORM: (
1004
+ MODEL_TENSOR.V_ENC_POST_ATTN_NORM: (
984
1005
  "vision_tower.vision_model.encoder.layers.{bid}.layer_norm2",
985
1006
  "vision_tower.vision_model.encoder.layers.{bid}.norm2", # InternVL
986
1007
  "vpm.encoder.layers.{bid}.layer_norm2",
987
1008
  "model.vision_model.encoder.layers.{bid}.layer_norm2", # SmolVLM
1009
+ "vision_model.model.layers.{bid}.post_attention_layernorm", # llama4
988
1010
  "vision_tower.transformer.layers.{bid}.ffn_norm", # pixtral
989
1011
  "visual.blocks.{bid}.norm2", # qwen2vl
990
1012
  ),
@@ -994,6 +1016,7 @@ class TensorNameMap:
994
1016
  "vpm.encoder.layers.{bid}.mlp.fc1",
995
1017
  "model.vision_model.encoder.layers.{bid}.mlp.fc1", # SmolVLM, gemma3
996
1018
  "vision_tower.transformer.layers.{bid}.feed_forward.up_proj", # pixtral
1019
+ "vision_model.model.layers.{bid}.mlp.fc1", # llama4
997
1020
  "visual.blocks.{bid}.mlp.fc1", # qwen2vl
998
1021
  "visual.blocks.{bid}.mlp.up_proj", # qwen2.5vl
999
1022
  ),
@@ -1008,6 +1031,7 @@ class TensorNameMap:
1008
1031
  "vpm.encoder.layers.{bid}.mlp.fc2",
1009
1032
  "model.vision_model.encoder.layers.{bid}.mlp.fc2", # SmolVLM, gemma3
1010
1033
  "vision_tower.transformer.layers.{bid}.feed_forward.down_proj", # pixtral
1034
+ "vision_model.model.layers.{bid}.mlp.fc2", # llama4
1011
1035
  "visual.blocks.{bid}.mlp.fc2", # qwen2vl
1012
1036
  "visual.blocks.{bid}.mlp.down_proj", # qwen2.5vl
1013
1037
  ),
@@ -1023,11 +1047,13 @@ class TensorNameMap:
1023
1047
  MODEL_TENSOR.V_PRE_NORM: (
1024
1048
  "vision_tower.vision_model.pre_layrnorm",
1025
1049
  "vision_tower.ln_pre", # pixtral
1050
+ "vision_model.layernorm_pre", # llama4
1026
1051
  ),
1027
1052
 
1028
1053
  MODEL_TENSOR.V_POST_NORM: (
1029
1054
  "vision_tower.vision_model.post_layernorm",
1030
1055
  "model.vision_model.post_layernorm", # SmolVLM
1056
+ "vision_model.layernorm_post", # llama4
1031
1057
  "visual.merger.ln_q", # qwen2vl
1032
1058
  ),
1033
1059
 
@@ -1094,6 +1120,77 @@ class TensorNameMap:
1094
1120
  MODEL_TENSOR.V_MM_PATCH_MERGER: (
1095
1121
  "multi_modal_projector.patch_merger.merging_layer", # mistral small 3.1
1096
1122
  ),
1123
+
1124
+ # audio (mtmd)
1125
+
1126
+ MODEL_TENSOR.A_ENC_EMBD_POS: (
1127
+ "audio_tower.embed_positions", # ultravox
1128
+ ),
1129
+
1130
+ MODEL_TENSOR.A_ENC_CONV1D: (
1131
+ "audio_tower.conv{bid}", # ultravox
1132
+ ),
1133
+
1134
+ MODEL_TENSOR.A_PRE_NORM: (),
1135
+
1136
+ MODEL_TENSOR.A_POST_NORM: (
1137
+ "audio_tower.layer_norm", # ultravox
1138
+ "audio_tower.ln_post", # qwen2omni
1139
+ ),
1140
+
1141
+ MODEL_TENSOR.A_ENC_ATTN_Q: (
1142
+ "audio_tower.layers.{bid}.self_attn.q_proj", # ultravox
1143
+ ),
1144
+
1145
+ MODEL_TENSOR.A_ENC_ATTN_K: (
1146
+ "audio_tower.layers.{bid}.self_attn.k_proj", # ultravox
1147
+ ),
1148
+
1149
+ MODEL_TENSOR.A_ENC_ATTN_V: (
1150
+ "audio_tower.layers.{bid}.self_attn.v_proj", # ultravox
1151
+ ),
1152
+
1153
+ MODEL_TENSOR.A_ENC_INPUT_NORM: (
1154
+ "audio_tower.layers.{bid}.self_attn_layer_norm", # ultravox
1155
+ ),
1156
+
1157
+ MODEL_TENSOR.A_ENC_OUTPUT: (
1158
+ "audio_tower.layers.{bid}.self_attn.out_proj", # ultravox
1159
+ ),
1160
+
1161
+ MODEL_TENSOR.A_ENC_OUTPUT_NORM: (
1162
+ "audio_tower.layers.{bid}.final_layer_norm", # ultravox
1163
+ ),
1164
+
1165
+ MODEL_TENSOR.A_ENC_FFN_UP: (
1166
+ "audio_tower.layers.{bid}.fc1", # ultravox
1167
+ ),
1168
+
1169
+ MODEL_TENSOR.A_ENC_FFN_GATE: (),
1170
+
1171
+ MODEL_TENSOR.A_ENC_FFN_DOWN: (
1172
+ "audio_tower.layers.{bid}.fc2", # ultravox
1173
+ ),
1174
+
1175
+ # note: some tensors below has "audio." pseudo-prefix, to prevent conflicts with vision tensors
1176
+ # this prefix is added in the conversion code in modify_tensors()
1177
+
1178
+ MODEL_TENSOR.A_MMPROJ: (
1179
+ "audio.multi_modal_projector.linear_{bid}", # ultravox
1180
+ ),
1181
+
1182
+ MODEL_TENSOR.A_MMPROJ_FC: (
1183
+ "audio.multi_modal_projector.linear", # qwen2audio
1184
+ "audio_tower.proj", # qwen2omni
1185
+ ),
1186
+
1187
+ MODEL_TENSOR.A_MM_NORM_PRE: (
1188
+ "audio.multi_modal_projector.ln_pre", # ultravox
1189
+ ),
1190
+
1191
+ MODEL_TENSOR.A_MM_NORM_MID: (
1192
+ "audio.multi_modal_projector.ln_mid", # ultravox
1193
+ ),
1097
1194
  }
1098
1195
 
1099
1196
  # architecture-specific block mappings
@@ -231,7 +231,7 @@ class SafetensorRemote:
231
231
  response.raise_for_status()
232
232
 
233
233
  # Get raw byte data
234
- return response.content[:size]
234
+ return response.content[slice(size if size > -1 else None)]
235
235
 
236
236
  @classmethod
237
237
  def check_file_exist(cls, url: str) -> bool:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "gguf"
3
- version = "0.16.3"
3
+ version = "0.17.0"
4
4
  description = "Read and write ML models in GGUF for GGML"
5
5
  authors = ["GGML <ggml@ggml.ai>"]
6
6
  packages = [