@novastera-oss/llamarn 0.2.9 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. package/android/build.gradle +2 -1
  2. package/android/proguard-rules.pro +12 -0
  3. package/android/src/main/cpp/include/llama.h +15 -47
  4. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  8. package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
  9. package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
  10. package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
  11. package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
  12. package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
  13. package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
  14. package/android/src/main/jniLibs/x86/libggml.so +0 -0
  15. package/android/src/main/jniLibs/x86/libllama.so +0 -0
  16. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  17. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  18. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  19. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  20. package/cpp/build-info.cpp +2 -2
  21. package/cpp/llama.cpp/CMakeLists.txt +0 -1
  22. package/cpp/llama.cpp/CMakePresets.json +11 -0
  23. package/cpp/llama.cpp/CODEOWNERS +1 -0
  24. package/cpp/llama.cpp/README.md +8 -8
  25. package/cpp/llama.cpp/build-xcframework.sh +1 -1
  26. package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
  27. package/cpp/llama.cpp/common/arg.cpp +62 -1
  28. package/cpp/llama.cpp/common/chat.cpp +37 -20
  29. package/cpp/llama.cpp/common/chat.h +2 -0
  30. package/cpp/llama.cpp/common/common.cpp +22 -6
  31. package/cpp/llama.cpp/common/common.h +22 -4
  32. package/cpp/llama.cpp/convert_hf_to_gguf.py +1250 -43
  33. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +21 -13
  34. package/cpp/llama.cpp/ggml/CMakeLists.txt +13 -3
  35. package/cpp/llama.cpp/ggml/cmake/ggml-config.cmake.in +85 -47
  36. package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
  37. package/cpp/llama.cpp/ggml/include/ggml-webgpu.h +19 -0
  38. package/cpp/llama.cpp/ggml/include/ggml.h +173 -10
  39. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +1 -1
  40. package/cpp/llama.cpp/ggml/src/ggml-alloc.c +0 -15
  41. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +7 -8
  42. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +44 -38
  43. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +3 -1
  44. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +126 -8
  45. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +130 -22
  46. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +138 -18
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +11 -3
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +1 -1
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +28 -1
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +109 -12
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +3 -0
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +88 -10
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +343 -1094
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1206 -163
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +6 -0
  56. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +0 -1
  57. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1 -1
  58. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +36 -9
  59. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +142 -9
  60. package/cpp/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +3 -3
  61. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +31 -4
  62. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +86 -17
  63. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
  64. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy-utils.cuh +225 -0
  65. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +41 -301
  66. package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
  67. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +85 -64
  68. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +47 -60
  69. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +29 -42
  70. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +46 -59
  71. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +36 -45
  72. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +38 -45
  73. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +23 -36
  74. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +3 -13
  75. package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
  76. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +255 -99
  77. package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cu +1 -1
  78. package/cpp/llama.cpp/ggml/src/ggml-cuda/mma.cuh +111 -3
  79. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +6 -4
  80. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +1152 -695
  81. package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cu +92 -5
  82. package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cuh +2 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
  84. package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
  85. package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cu +275 -0
  86. package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cuh +7 -0
  87. package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
  88. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
  89. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
  90. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +104 -0
  91. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +13 -0
  92. package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
  93. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +27 -6
  94. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +2 -2
  95. package/cpp/llama.cpp/ggml/src/ggml-impl.h +80 -0
  96. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
  97. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +48 -12
  98. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +572 -106
  99. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +599 -105
  100. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +18 -4
  101. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +5 -0
  102. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +800 -42
  103. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  104. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  105. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
  106. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
  107. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +1 -1
  108. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +1 -1
  109. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  110. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +79 -0
  111. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
  112. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
  113. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
  114. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
  115. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
  116. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
  117. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
  118. package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
  119. package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +4 -4
  120. package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
  121. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +693 -1034
  122. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
  123. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +14 -26
  124. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +191 -55
  125. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
  126. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +8 -9
  127. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +15 -18
  128. package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
  129. package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
  130. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +2 -6
  131. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +991 -307
  132. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +265 -0
  133. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +59 -12
  134. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +1 -1
  135. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
  136. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +1 -1
  137. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +1 -1
  138. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
  139. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
  140. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
  141. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
  142. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
  143. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
  144. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
  145. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
  146. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
  147. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
  148. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +2 -0
  149. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +17 -0
  150. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
  151. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +3 -8
  152. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
  153. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
  154. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
  155. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +18 -3
  156. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
  157. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +1 -4
  158. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
  159. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
  160. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
  161. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp +5 -0
  162. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
  163. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
  164. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
  165. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
  166. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +84 -9
  167. package/cpp/llama.cpp/ggml/src/ggml-webgpu/CMakeLists.txt +54 -0
  168. package/cpp/llama.cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +907 -0
  169. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +60 -0
  170. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +35 -0
  171. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  172. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +56 -0
  173. package/cpp/llama.cpp/ggml/src/ggml.c +386 -67
  174. package/cpp/llama.cpp/ggml/src/gguf.cpp +8 -1
  175. package/cpp/llama.cpp/gguf-py/gguf/constants.py +307 -0
  176. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +8 -2
  177. package/cpp/llama.cpp/gguf-py/gguf/metadata.py +4 -0
  178. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_dump.py +24 -1
  179. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +122 -47
  180. package/cpp/llama.cpp/gguf-py/gguf/vocab.py +12 -3
  181. package/cpp/llama.cpp/include/llama.h +15 -47
  182. package/cpp/llama.cpp/models/templates/llama-cpp-rwkv-world.jinja +34 -0
  183. package/cpp/llama.cpp/models/templates/moonshotai-Kimi-K2.jinja +43 -0
  184. package/cpp/llama.cpp/requirements/requirements-all.txt +1 -0
  185. package/cpp/llama.cpp/requirements/requirements-server-bench.txt +5 -0
  186. package/cpp/llama.cpp/src/llama-arch.cpp +316 -3
  187. package/cpp/llama.cpp/src/llama-arch.h +23 -1
  188. package/cpp/llama.cpp/src/llama-batch.cpp +103 -71
  189. package/cpp/llama.cpp/src/llama-batch.h +31 -18
  190. package/cpp/llama.cpp/src/llama-chat.cpp +58 -1
  191. package/cpp/llama.cpp/src/llama-chat.h +3 -0
  192. package/cpp/llama.cpp/src/llama-context.cpp +180 -106
  193. package/cpp/llama.cpp/src/llama-context.h +26 -16
  194. package/cpp/llama.cpp/src/llama-cparams.h +3 -2
  195. package/cpp/llama.cpp/src/llama-graph.cpp +310 -211
  196. package/cpp/llama.cpp/src/llama-graph.h +184 -122
  197. package/cpp/llama.cpp/src/llama-hparams.cpp +47 -1
  198. package/cpp/llama.cpp/src/llama-hparams.h +13 -2
  199. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +38 -22
  200. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +7 -2
  201. package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +849 -304
  202. package/cpp/llama.cpp/src/llama-kv-cache-unified.h +143 -47
  203. package/cpp/llama.cpp/src/llama-kv-cells.h +62 -10
  204. package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +10 -4
  205. package/cpp/llama.cpp/src/llama-memory-hybrid.h +3 -1
  206. package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +36 -11
  207. package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
  208. package/cpp/llama.cpp/src/llama-memory.h +3 -0
  209. package/cpp/llama.cpp/src/llama-model.cpp +3545 -719
  210. package/cpp/llama.cpp/src/llama-model.h +21 -4
  211. package/cpp/llama.cpp/src/llama-quant.cpp +2 -2
  212. package/cpp/llama.cpp/src/llama-vocab.cpp +376 -10
  213. package/cpp/llama.cpp/src/llama-vocab.h +43 -0
  214. package/cpp/llama.cpp/src/unicode.cpp +207 -0
  215. package/cpp/llama.cpp/src/unicode.h +2 -0
  216. package/ios/include/chat.h +2 -0
  217. package/ios/include/common.h +22 -4
  218. package/ios/include/llama.h +15 -47
  219. package/ios/libs/llama.xcframework/Info.plist +13 -13
  220. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  221. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5267 -4890
  222. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  223. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +173 -10
  224. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +15 -47
  225. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  226. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  227. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
  228. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4014 -3764
  229. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  230. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
  231. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
  232. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  233. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  234. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
  235. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4016 -3766
  236. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
  237. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +173 -10
  238. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +15 -47
  239. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
  240. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +173 -10
  241. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +15 -47
  242. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  243. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
  244. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +173 -10
  245. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +15 -47
  246. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  247. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  248. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  249. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5267 -4890
  250. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  251. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +173 -10
  252. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +15 -47
  253. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  254. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  255. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
  256. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4014 -3764
  257. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  258. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
  259. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
  260. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  261. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  262. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5303 -4926
  263. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  264. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +173 -10
  265. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +15 -47
  266. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  267. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  268. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5274 -4897
  269. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4044 -3794
  270. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  271. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
  272. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
  273. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  274. package/package.json +4 -4
  275. package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
  276. package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  277. package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  278. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
  279. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
  280. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
  281. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
  282. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
  283. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
  284. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
  285. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
  286. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
  287. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
  288. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
  289. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
  290. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
  291. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
  292. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
  293. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
  294. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
  295. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
  296. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
  297. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
  298. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
  299. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
  300. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
  301. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
  302. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
  303. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
  304. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
  305. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
  306. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
  307. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
  308. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
  309. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
  310. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
  311. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
  312. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
  313. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
  314. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
@@ -13,7 +13,7 @@ class TensorNameMap:
13
13
  "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
14
14
  "transformer.word_embeddings", # falcon
15
15
  "word_embeddings", # bloom
16
- "model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414
16
+ "model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414 plamo2 granite-hybrid
17
17
  "tok_embeddings", # llama-pth
18
18
  "embeddings.word_embeddings", # bert nomic-bert
19
19
  "language_model.embedding.word_embeddings", # persimmon
@@ -50,6 +50,7 @@ class TensorNameMap:
50
50
  "model.pre_ln", # rwkv7
51
51
  "model.layers.0.pre_norm", # rwkv7
52
52
  "backbone.norm", # wavtokenizer
53
+ "model.embedding_norm", # lfm2
53
54
  ),
54
55
 
55
56
  # Position embeddings
@@ -62,7 +63,7 @@ class TensorNameMap:
62
63
  # Output
63
64
  MODEL_TENSOR.OUTPUT: (
64
65
  "embed_out", # gptneox
65
- "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe
66
+ "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe plamo2
66
67
  "output", # llama-pth bloom internlm2
67
68
  "word_embeddings_for_head", # persimmon
68
69
  "lm_head.linear", # phi2
@@ -76,7 +77,7 @@ class TensorNameMap:
76
77
  MODEL_TENSOR.OUTPUT_NORM: (
77
78
  "gpt_neox.final_layer_norm", # gptneox
78
79
  "transformer.ln_f", # gpt2 gpt-j falcon jais exaone
79
- "model.norm", # llama-hf baichuan internlm2 olmoe olmo2 phimoe
80
+ "model.norm", # llama-hf baichuan internlm2 olmoe olmo2 phimoe plamo2
80
81
  "norm", # llama-pth
81
82
  "transformer.norm_f", # mpt dbrx
82
83
  "ln_f", # refact bloom qwen gpt2
@@ -118,13 +119,14 @@ class TensorNameMap:
118
119
  "transformer.h.{bid}.input_layernorm", # falcon7b
119
120
  "h.{bid}.input_layernorm", # bloom
120
121
  "transformer.h.{bid}.ln_mlp", # falcon40b
121
- "model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe
122
+ "model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe granite-hybrid
122
123
  "layers.{bid}.attention_norm", # llama-pth
123
124
  "language_model.encoder.layers.{bid}.input_layernorm", # persimmon
124
125
  "model.layers.{bid}.ln1", # yi
125
126
  "h.{bid}.ln_1", # gpt2
126
127
  "transformer.h.{bid}.ln", # phi2
127
128
  "model.layers.layers.{bid}.norm", # plamo
129
+ "model.layers.layers.{bid}.pre_mixer_norm", # plamo2
128
130
  "model.layers.{bid}.attention_norm", # internlm2
129
131
  "model.layers.{bid}.norm", # mamba-qbert
130
132
  "backbone.layers.{bid}.norm", # mamba
@@ -136,6 +138,7 @@ class TensorNameMap:
136
138
  "model.layers.{bid}.ln1", # rwkv7
137
139
  "model.layers.{bid}.input_layernorm", # llama4
138
140
  "transformer_encoder.{bid}.attention_norm", # neobert
141
+ "model.layers.{bid}.operator_norm", # lfm2
139
142
  ),
140
143
 
141
144
  # Attention norm 2
@@ -161,6 +164,7 @@ class TensorNameMap:
161
164
  "encoder.layers.{bid}.attn.Wqkv", # nomic-bert
162
165
  "encoder.layers.{bid}.mixer.Wqkv", # jina
163
166
  "model.layers.{bid}.self_attn.qkv_proj", # phi3
167
+ "model.layers.layers.{bid}.mixer.qkv_proj", # plamo2
164
168
  "encoder.layers.{bid}.self_attention.query_key_value", # chatglm
165
169
  "transformer.layers.{bid}.attn.qkv_proj", # openelm
166
170
  "transformer_encoder.{bid}.qkv", # neobert
@@ -220,6 +224,7 @@ class TensorNameMap:
220
224
  "transformer.h.{bid}.self_attention.dense", # falcon
221
225
  "h.{bid}.self_attention.dense", # bloom
222
226
  "model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2 phimoe
227
+ "model.layers.{bid}.self_attn.out_proj", # lfm2
223
228
  "model.layers.{bid}.self_attn.linear_attn", # deci
224
229
  "layers.{bid}.attention.wo", # llama-pth
225
230
  "encoder.layer.{bid}.attention.output.dense", # bert
@@ -230,6 +235,7 @@ class TensorNameMap:
230
235
  "h.{bid}.attn.c_proj", # gpt2
231
236
  "transformer.h.{bid}.mixer.out_proj", # phi2
232
237
  "model.layers.layers.{bid}.self_attn.o_proj", # plamo
238
+ "model.layers.layers.{bid}.mixer.o_proj", # plamo2
233
239
  "model.layers.{bid}.attention.wo", # internlm2
234
240
  "encoder.layers.{bid}.attn.out_proj", # nomic-bert
235
241
  "encoder.layers.{bid}.mixer.out_proj", # jina
@@ -252,8 +258,9 @@ class TensorNameMap:
252
258
  ),
253
259
 
254
260
  MODEL_TENSOR.ATTN_POST_NORM: (
255
- "model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2 # ge
256
- "model.layers.{bid}.post_self_attn_layernorm", # glm-4-0414
261
+ "model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2 # ge
262
+ "model.layers.{bid}.post_self_attn_layernorm", # glm-4-0414
263
+ "model.layers.layers.{bid}.post_mixer_norm.weight", # plamo2
257
264
  ),
258
265
 
259
266
  # Rotary embeddings
@@ -279,19 +286,25 @@ class TensorNameMap:
279
286
  "transformer.decoder_layer.{bid}.rms_norm_2", # Grok
280
287
  "encoder.layers.{bid}.post_attention_layernorm", # chatglm
281
288
  "transformer.layers.{bid}.ffn_norm", # openelm
289
+ "model.layers.{bid}.pre_ff_layernorm", # jamba granite-hybrid
290
+ "model.layers.{bid}.pre_moe_layernorm", # mini-jamba
282
291
  "model.layers.{bid}.post_attention_layernorm", # llama4
283
292
  "transformer_encoder.{bid}.ffn_norm", # neobert
293
+ "model.layers.layers.{bid}.pre_mlp_norm", # plamo2
284
294
  ),
285
295
 
286
296
  # Post feed-forward norm
287
297
  MODEL_TENSOR.FFN_PRE_NORM: (
288
298
  "model.layers.{bid}.pre_feedforward_layernorm", # gemma2
299
+ "model.layers.{bid}.pre_ff_layernorm.weight",
289
300
  ),
290
301
 
291
302
  # Post feed-forward norm
292
303
  MODEL_TENSOR.FFN_POST_NORM: (
293
304
  "model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
294
305
  "model.layers.{bid}.post_mlp_layernorm", # glm-4-0414
306
+ "model.layers.layers.{bid}.post_mlp_norm.weight", # plamo2
307
+ "model.layers.{bid}.feed_forward.up_proj",
295
308
  ),
296
309
 
297
310
  MODEL_TENSOR.FFN_GATE_INP: (
@@ -301,8 +314,9 @@ class TensorNameMap:
301
314
  "transformer.decoder_layer.{bid}.router", # Grok
302
315
  "transformer.blocks.{bid}.ffn.router.layer", # dbrx
303
316
  "model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
304
- "model.layers.{bid}.feed_forward.router", # llama4
317
+ "model.layers.{bid}.feed_forward.router", # llama4 jamba
305
318
  "encoder.layers.{bid}.mlp.router.layer", # nomic-bert-moe
319
+ "model.layers.{bid}.mlp.gate.wg", # hunyuan
306
320
  ),
307
321
 
308
322
  MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
@@ -310,7 +324,8 @@ class TensorNameMap:
310
324
  ),
311
325
 
312
326
  MODEL_TENSOR.FFN_EXP_PROBS_B: (
313
- "model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3 dots1
327
+ "model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3 dots1
328
+ "model.layers.{bid}.mlp.moe_statics.e_score_correction", # ernie4.5-moe
314
329
  ),
315
330
 
316
331
  # Feed-forward up
@@ -334,6 +349,7 @@ class TensorNameMap:
334
349
  "model.layers.{bid}.mlp.fc1", # phi2
335
350
  "model.layers.{bid}.mlp.gate_up_proj", # phi3 glm-4-0414
336
351
  "model.layers.layers.{bid}.mlp.up_proj", # plamo
352
+ "model.layers.layers.{bid}.mlp.gate_up_proj", # plamo2
337
353
  "model.layers.{bid}.feed_forward.w3", # internlm2
338
354
  "encoder.layers.{bid}.mlp.fc11", # nomic-bert
339
355
  "encoder.layers.{bid}.mlp.fc1", # nomic-bert-moe
@@ -344,24 +360,26 @@ class TensorNameMap:
344
360
  "model.layers.{bid}.residual_mlp.w3", # arctic
345
361
  "encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
346
362
  "transformer.h.{bid}.mlp.c_fc_1", # exaone
347
- "model.layers.{bid}.feed_forward.up_proj", # llama4
363
+ "model.layers.{bid}.feed_forward.up_proj", # llama4 jamba granite-hybrid
348
364
  "transformer_encoder.{bid}.ffn.w12", # neobert
349
365
  ),
350
366
 
351
367
  MODEL_TENSOR.FFN_UP_EXP: (
352
- "layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
353
- "transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
354
- "transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
355
- "model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged)
356
- "model.layers.{bid}.block_sparse_moe.experts.w3", # phimoe (merged)
357
- "model.layers.{bid}.feed_forward.experts.up_proj", # llama4
358
- "encoder.layers.{bid}.mlp.experts.mlp.w1", # nomic-bert-moe
368
+ "layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
369
+ "transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
370
+ "transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
371
+ "model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged) ernie4.5-moe
372
+ "model.layers.{bid}.block_sparse_moe.experts.w3", # phimoe (merged)
373
+ "model.layers.{bid}.feed_forward.experts.up_proj", # llama4
374
+ "encoder.layers.{bid}.mlp.experts.mlp.w1", # nomic-bert-moe
359
375
  ),
360
376
 
361
377
  MODEL_TENSOR.FFN_UP_SHEXP: (
362
378
  "model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
363
379
  "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
364
380
  "model.layers.{bid}.feed_forward.shared_expert.up_proj", # llama4
381
+ "model.layers.{bid}.feed_forward.down_proj",
382
+ "model.layers.{bid}.mlp.shared_mlp.up_proj", # hunyuan
365
383
  ),
366
384
 
367
385
  # AWQ-activation gate
@@ -382,22 +400,23 @@ class TensorNameMap:
382
400
  "transformer.h.{bid}.mlp.linear_1", # refact
383
401
  "model.layers.{bid}.residual_mlp.w1", # arctic
384
402
  "transformer.h.{bid}.mlp.c_fc_0", # exaone
385
- "model.layers.{bid}.feed_forward.gate_proj", # llama4
403
+ "model.layers.{bid}.feed_forward.gate_proj", # llama4 jamba granite-hybrid
386
404
  ),
387
405
 
388
406
  MODEL_TENSOR.FFN_GATE_EXP: (
389
- "layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
390
- "transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
391
- "transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
392
- "model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged)
393
- "model.layers.{bid}.block_sparse_moe.experts.w1", # phimoe (merged)
394
- "model.layers.{bid}.feed_forward.experts.gate_proj", # llama4
407
+ "layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
408
+ "transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
409
+ "transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
410
+ "model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged) ernie4.5-moe
411
+ "model.layers.{bid}.block_sparse_moe.experts.w1", # phimoe (merged)
412
+ "model.layers.{bid}.feed_forward.experts.gate_proj", # llama4
395
413
  ),
396
414
 
397
415
  MODEL_TENSOR.FFN_GATE_SHEXP: (
398
416
  "model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
399
417
  "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
400
418
  "model.layers.{bid}.feed_forward.shared_expert.gate_proj", # llama4
419
+ "model.layers.{bid}.mlp.shared_mlp.gate_proj", # hunyuan
401
420
  ),
402
421
 
403
422
  # Feed-forward down
@@ -427,19 +446,19 @@ class TensorNameMap:
427
446
  "encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
428
447
  "encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
429
448
  "model.layers.h.{bid}.mlp.c_proj", # exaone
430
- "model.layers.{bid}.feed_forward.down_proj", # llama4
449
+ "model.layers.{bid}.feed_forward.down_proj", # llama4 jamba granite-hybrid
431
450
  "transformer_encoder.{bid}.ffn.w3", # neobert
432
451
  ),
433
452
 
434
453
  MODEL_TENSOR.FFN_DOWN_EXP: (
435
- "layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
436
- "transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
437
- "transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
438
- "model.layers.{bid}.mlp.experts.down_proj", # qwen2moe olmoe (merged)
439
- "model.layers.{bid}.block_sparse_moe.output_linear", # granitemoe
440
- "model.layers.{bid}.block_sparse_moe.experts.w2", # phimoe (merged)
441
- "model.layers.{bid}.feed_forward.experts.down_proj", # llama4
442
- "encoder.layers.{bid}.mlp.experts.mlp.w2", # nomic-bert-moe
454
+ "layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
455
+ "transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
456
+ "transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
457
+ "model.layers.{bid}.mlp.experts.down_proj", # qwen2moe olmoe (merged) ernie4.5-moe
458
+ "model.layers.{bid}.block_sparse_moe.output_linear", # granitemoe
459
+ "model.layers.{bid}.block_sparse_moe.experts.w2", # phimoe (merged)
460
+ "model.layers.{bid}.feed_forward.experts.down_proj", # llama4
461
+ "encoder.layers.{bid}.mlp.experts.mlp.w2", # nomic-bert-moe
443
462
  ),
444
463
 
445
464
  MODEL_TENSOR.FFN_DOWN_SHEXP: (
@@ -447,24 +466,29 @@ class TensorNameMap:
447
466
  "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
448
467
  "model.layers.{bid}.feed_forward.shared_expert.down_proj", # llama4
449
468
  "model.layers.{bid}.shared_mlp.output_linear", # granitemoe
469
+ "model.layers.{bid}.mlp.shared_mlp.down_proj", # hunyuan
450
470
  ),
451
471
 
452
472
  MODEL_TENSOR.ATTN_Q_NORM: (
453
473
  "language_model.encoder.layers.{bid}.self_attention.q_layernorm",
454
474
  "model.layers.{bid}.self_attn.q_layernorm", # persimmon
475
+ "model.layers.{bid}.self_attn.query_layernorm", # hunyuan
455
476
  "model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo2
456
477
  "transformer.blocks.{bid}.attn.q_ln", # sea-lion
457
478
  "encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
458
479
  "transformer.layers.{bid}.attn.q_norm", # openelm
480
+ "model.layers.layers.{bid}.mixer.q", # plamo2
459
481
  ),
460
482
 
461
483
  MODEL_TENSOR.ATTN_K_NORM: (
462
484
  "language_model.encoder.layers.{bid}.self_attention.k_layernorm",
463
485
  "model.layers.{bid}.self_attn.k_layernorm", # persimmon
486
+ "model.layers.{bid}.self_attn.key_layernorm", # hunyuan
464
487
  "model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo2
465
488
  "transformer.blocks.{bid}.attn.k_ln", # sea-lion
466
489
  "encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
467
490
  "transformer.layers.{bid}.attn.k_norm", # openelm
491
+ "model.layers.layers.{bid}.mixer.k", # plamo2
468
492
  ),
469
493
 
470
494
  MODEL_TENSOR.ROPE_FREQS: (
@@ -477,7 +501,7 @@ class TensorNameMap:
477
501
  "encoder.layers.{bid}.norm2", # nomic-bert
478
502
  "transformer.decoder_layer.{bid}.rms_norm_3", # Grok
479
503
  "encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
480
- "encoder.layer.{bid}.layer_norm_2" # jina-v2-code
504
+ "encoder.layer.{bid}.layer_norm_2", # jina-v2-code
481
505
  ),
482
506
 
483
507
  MODEL_TENSOR.PER_LAYER_TOKEN_EMBD: (
@@ -545,38 +569,77 @@ class TensorNameMap:
545
569
  ),
546
570
 
547
571
  MODEL_TENSOR.SSM_IN: (
548
- "model.layers.{bid}.in_proj",
549
- "backbone.layers.{bid}.mixer.in_proj",
572
+ "model.layers.{bid}.in_proj", # mamba-hf
573
+ "backbone.layers.{bid}.mixer.in_proj", # mamba
574
+ "model.layers.{bid}.mamba.in_proj", # jamba falcon-h1 granite-hybrid
575
+ "model.layers.layers.{bid}.mixer.in_proj", # plamo2
550
576
  ),
551
577
 
552
578
  MODEL_TENSOR.SSM_CONV1D: (
553
- "model.layers.{bid}.conv1d",
554
- "backbone.layers.{bid}.mixer.conv1d",
579
+ "model.layers.{bid}.conv1d", # mamba-hf
580
+ "backbone.layers.{bid}.mixer.conv1d", # mamba
581
+ "model.layers.{bid}.mamba.conv1d", # jamba falcon-h1 granite-hybrid
582
+ "model.layers.layers.{bid}.mixer.conv1d", # plamo2
555
583
  ),
556
584
 
557
585
  MODEL_TENSOR.SSM_X: (
558
- "model.layers.{bid}.x_proj",
559
- "backbone.layers.{bid}.mixer.x_proj",
586
+ "model.layers.{bid}.x_proj", # mamba-hf
587
+ "backbone.layers.{bid}.mixer.x_proj", # mamba
588
+ "model.layers.{bid}.mamba.x_proj", # jamba
589
+ "model.layers.layers.{bid}.mixer.bcdt_proj", # plamo2
560
590
  ),
561
591
 
562
592
  MODEL_TENSOR.SSM_DT: (
563
- "model.layers.{bid}.dt_proj",
564
- "backbone.layers.{bid}.mixer.dt_proj",
593
+ "model.layers.{bid}.dt_proj", # mamba-hf
594
+ "backbone.layers.{bid}.mixer.dt_proj", # mamba
595
+ "model.layers.{bid}.mamba.dt_proj", # jamba falcon-h1 granite-hybrid
596
+ "model.layers.layers.{bid}.mixer.dt_proj", # plamo2
597
+ ),
598
+
599
+ MODEL_TENSOR.SSM_DT_NORM: (
600
+ "model.layers.{bid}.mamba.dt_layernorm", # jamba
565
601
  ),
566
602
 
567
603
  MODEL_TENSOR.SSM_A: (
568
- "model.layers.{bid}.A_log",
569
- "backbone.layers.{bid}.mixer.A_log",
604
+ "model.layers.{bid}.A_log", # mamba-hf
605
+ "backbone.layers.{bid}.mixer.A_log", # mamba
606
+ "model.layers.{bid}.mamba.A_log", # jamba falcon-h1 granite-hybrid
607
+ "model.layers.layers.{bid}.mixer.A_log", # plamo2
608
+ ),
609
+
610
+ MODEL_TENSOR.SSM_B_NORM: (
611
+ "model.layers.{bid}.mamba.b_layernorm", # jamba
612
+ "model.layers.{bid}.mamba.B_layernorm", # mini-jamba
613
+ "model.layers.layers.{bid}.mixer.B_norm.weight", # plamo2
614
+ ),
615
+
616
+ MODEL_TENSOR.SSM_C_NORM: (
617
+ "model.layers.{bid}.mamba.c_layernorm", # jamba
618
+ "model.layers.{bid}.mamba.C_layernorm", # mini-jamba
619
+ "model.layers.layers.{bid}.mixer.C_norm.weight", # plamo2
570
620
  ),
571
621
 
572
622
  MODEL_TENSOR.SSM_D: (
573
- "model.layers.{bid}.D",
574
- "backbone.layers.{bid}.mixer.D",
623
+ "model.layers.{bid}.D", # mamba-hf
624
+ "backbone.layers.{bid}.mixer.D", # mamba
625
+ "model.layers.{bid}.mamba.D", # jamba falcon-h1 granite-hybrid
626
+ "model.layers.layers.{bid}.mixer.D", # plamo2
627
+ ),
628
+
629
+ MODEL_TENSOR.SSM_DT_NORM: (
630
+ "model.layers.layers.{bid}.mixer.dt_norm.weight", # plamo2
631
+ ),
632
+
633
+ MODEL_TENSOR.SSM_NORM: (
634
+ "model.layers.{bid}.mamba.norm", # falcon-h1 granite-hybrid
635
+ "backbone.layers.{bid}.mixer.norm", # mamba2
575
636
  ),
576
637
 
577
638
  MODEL_TENSOR.SSM_OUT: (
578
- "model.layers.{bid}.out_proj",
579
- "backbone.layers.{bid}.mixer.out_proj",
639
+ "model.layers.{bid}.out_proj", # mamba-hf
640
+ "backbone.layers.{bid}.mixer.out_proj", # mamba
641
+ "model.layers.{bid}.mamba.out_proj", # jamba falcon-h1 granite-hybrid
642
+ "model.layers.layers.{bid}.mixer.out_proj", # plamo2
580
643
  ),
581
644
 
582
645
  MODEL_TENSOR.TIME_MIX_W0: (
@@ -978,6 +1041,18 @@ class TensorNameMap:
978
1041
  "backbone.posnet.{bid}.proj_out", # wavtokenizer
979
1042
  ),
980
1043
 
1044
+ MODEL_TENSOR.SHORTCONV_CONV: (
1045
+ "model.layers.{bid}.conv.conv",
1046
+ ),
1047
+
1048
+ MODEL_TENSOR.SHORTCONV_INPROJ: (
1049
+ "model.layers.{bid}.conv.in_proj",
1050
+ ),
1051
+
1052
+ MODEL_TENSOR.SHORTCONV_OUTPROJ: (
1053
+ "model.layers.{bid}.conv.out_proj",
1054
+ ),
1055
+
981
1056
  #############################################################################
982
1057
  ## Vision encoder
983
1058
 
@@ -245,9 +245,18 @@ class SpecialVocab:
245
245
  if not tokenizer_config:
246
246
  return True
247
247
  chat_template_alt = None
248
- chat_template_file = path / 'chat_template.json'
249
- if chat_template_file.is_file():
250
- with open(chat_template_file, encoding = 'utf-8') as f:
248
+ chat_template_json = path / 'chat_template.json'
249
+ chat_template_jinja = path / 'chat_template.jinja'
250
+ if chat_template_jinja.is_file():
251
+ with open(chat_template_jinja, encoding = 'utf-8') as f:
252
+ chat_template_alt = f.read()
253
+ if additional_templates := list((path / 'additional_chat_templates').glob('*.jinja')):
254
+ chat_template_alt = [{'name': 'default', 'template': chat_template_alt}]
255
+ for template_path in additional_templates:
256
+ with open(template_path, encoding = 'utf-8') as fp:
257
+ chat_template_alt.append({'name': template_path.stem, 'template': fp.read()})
258
+ elif chat_template_json.is_file():
259
+ with open(chat_template_json, encoding = 'utf-8') as f:
251
260
  chat_template_alt = json.load(f).get('chat_template')
252
261
  chat_template = tokenizer_config.get('chat_template', chat_template_alt)
253
262
  if chat_template is None or isinstance(chat_template, (str, list)):
@@ -71,52 +71,13 @@ extern "C" {
71
71
  typedef int32_t llama_seq_id;
72
72
 
73
73
  enum llama_vocab_type {
74
- LLAMA_VOCAB_TYPE_NONE = 0, // For models without vocab
75
- LLAMA_VOCAB_TYPE_SPM = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback
76
- LLAMA_VOCAB_TYPE_BPE = 2, // GPT-2 tokenizer based on byte-level BPE
77
- LLAMA_VOCAB_TYPE_WPM = 3, // BERT tokenizer based on WordPiece
78
- LLAMA_VOCAB_TYPE_UGM = 4, // T5 tokenizer based on Unigram
79
- LLAMA_VOCAB_TYPE_RWKV = 5, // RWKV tokenizer based on greedy tokenization
80
- };
81
-
82
- // pre-tokenization types
83
- enum llama_vocab_pre_type {
84
- LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0,
85
- LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1,
86
- LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM = 2,
87
- LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3,
88
- LLAMA_VOCAB_PRE_TYPE_FALCON = 4,
89
- LLAMA_VOCAB_PRE_TYPE_MPT = 5,
90
- LLAMA_VOCAB_PRE_TYPE_STARCODER = 6,
91
- LLAMA_VOCAB_PRE_TYPE_GPT2 = 7,
92
- LLAMA_VOCAB_PRE_TYPE_REFACT = 8,
93
- LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9,
94
- LLAMA_VOCAB_PRE_TYPE_STABLELM2 = 10,
95
- LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11,
96
- LLAMA_VOCAB_PRE_TYPE_OLMO = 12,
97
- LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
98
- LLAMA_VOCAB_PRE_TYPE_SMAUG = 14,
99
- LLAMA_VOCAB_PRE_TYPE_PORO = 15,
100
- LLAMA_VOCAB_PRE_TYPE_CHATGLM3 = 16,
101
- LLAMA_VOCAB_PRE_TYPE_CHATGLM4 = 17,
102
- LLAMA_VOCAB_PRE_TYPE_VIKING = 18,
103
- LLAMA_VOCAB_PRE_TYPE_JAIS = 19,
104
- LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20,
105
- LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21,
106
- LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22,
107
- LLAMA_VOCAB_PRE_TYPE_BLOOM = 23,
108
- LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
109
- LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
110
- LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
111
- LLAMA_VOCAB_PRE_TYPE_MINERVA = 27,
112
- LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28,
113
- LLAMA_VOCAB_PRE_TYPE_GPT4O = 29,
114
- LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30,
115
- LLAMA_VOCAB_PRE_TYPE_TRILLION = 31,
116
- LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32,
117
- LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33,
118
- LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34,
119
- LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35,
74
+ LLAMA_VOCAB_TYPE_NONE = 0, // For models without vocab
75
+ LLAMA_VOCAB_TYPE_SPM = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback
76
+ LLAMA_VOCAB_TYPE_BPE = 2, // GPT-2 tokenizer based on byte-level BPE
77
+ LLAMA_VOCAB_TYPE_WPM = 3, // BERT tokenizer based on WordPiece
78
+ LLAMA_VOCAB_TYPE_UGM = 4, // T5 tokenizer based on Unigram
79
+ LLAMA_VOCAB_TYPE_RWKV = 5, // RWKV tokenizer based on greedy tokenization
80
+ LLAMA_VOCAB_TYPE_PLAMO2 = 6, // PLaMo-2 tokenizer based on Aho-Corasick with dynamic programming
120
81
  };
121
82
 
122
83
  enum llama_rope_type {
@@ -374,6 +335,9 @@ extern "C" {
374
335
  bool swa_full; // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
375
336
  // NOTE: setting to false when n_seq_max > 1 can cause bad performance in some cases
376
337
  // ref: https://github.com/ggml-org/llama.cpp/pull/13845#issuecomment-2924800573
338
+ bool kv_unified; // use a unified buffer across the input sequences when computing the attention
339
+ // try to disable when n_seq_max > 1 for improved performance when the sequences do not share a large prefix
340
+ // ref: https://github.com/ggml-org/llama.cpp/pull/14363
377
341
  };
378
342
 
379
343
  // model quantization parameters
@@ -764,7 +728,7 @@ extern "C" {
764
728
  // - lazily on next llama_decode()
765
729
  // p0 < 0 : [0, p1]
766
730
  // p1 < 0 : [p0, inf)
767
- DEPRECATED(void llama_kv_self_seq_div(
731
+ DEPRECATED(LLAMA_API void llama_kv_self_seq_div(
768
732
  struct llama_context * ctx,
769
733
  llama_seq_id seq_id,
770
734
  llama_pos p0,
@@ -992,6 +956,7 @@ extern "C" {
992
956
  // in the order they have appeared in the batch.
993
957
  // Rows: number of tokens for which llama_batch.logits[i] != 0
994
958
  // Cols: n_vocab
959
+ // TODO: deprecate in favor of llama_get_logits_ith() (ref: https://github.com/ggml-org/llama.cpp/pull/14853#issuecomment-3113143522)
995
960
  LLAMA_API float * llama_get_logits(struct llama_context * ctx);
996
961
 
997
962
  // Logits for the ith token. For positive indices, Equivalent to:
@@ -1006,6 +971,7 @@ extern "C" {
1006
971
  // in the order they have appeared in the batch.
1007
972
  // shape: [n_outputs*n_embd]
1008
973
  // Otherwise, returns NULL.
974
+ // TODO: deprecate in favor of llama_get_embeddings_ith() (ref: https://github.com/ggml-org/llama.cpp/pull/14853#issuecomment-3113143522)
1009
975
  LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
1010
976
 
1011
977
  // Get the embeddings for the ith token. For positive indices, Equivalent to:
@@ -1044,6 +1010,7 @@ extern "C" {
1044
1010
  LLAMA_API llama_token llama_vocab_sep(const struct llama_vocab * vocab); // sentence separator
1045
1011
  LLAMA_API llama_token llama_vocab_nl (const struct llama_vocab * vocab); // next-line
1046
1012
  LLAMA_API llama_token llama_vocab_pad(const struct llama_vocab * vocab); // padding
1013
+ LLAMA_API llama_token llama_vocab_mask(const struct llama_vocab * vocab); // mask
1047
1014
 
1048
1015
  LLAMA_API bool llama_vocab_get_add_bos(const struct llama_vocab * vocab);
1049
1016
  LLAMA_API bool llama_vocab_get_add_eos(const struct llama_vocab * vocab);
@@ -1429,6 +1396,7 @@ extern "C" {
1429
1396
 
1430
1397
  int32_t n_p_eval;
1431
1398
  int32_t n_eval;
1399
+ int32_t n_reused; // number of times a ggml compute graph had been reused
1432
1400
  };
1433
1401
 
1434
1402
  struct llama_perf_sampler_data {
@@ -0,0 +1,34 @@
1
+ {%- if not add_generation_prompt is defined -%}
2
+ {%- set add_generation_prompt = true -%}
3
+ {%- endif -%}
4
+ {%- set ns = namespace(system_prompt='') -%}
5
+ {%- for message in messages -%}
6
+ {%- if message['role'] == 'system' -%}
7
+ {%- set ns.system_prompt = message['content'] -%}
8
+ {%- endif -%}
9
+ {%- endfor -%}
10
+ {{bos_token}}
11
+ {%- if ns.system_prompt != '' -%}
12
+ {{- 'System: ' + ns.system_prompt + '\n\n' -}}
13
+ {%- endif -%}
14
+ {%- for message in messages -%}
15
+ {%- if message['role'] == 'user' -%}
16
+ {{- 'User: ' + message['content']|trim + '\n\n' -}}
17
+ {%- endif -%}
18
+ {%- if message['role'] == 'assistant' and message['content'] is not none -%}
19
+ {%- set content = message['content'] -%}
20
+ {%- if '</think>' in content -%}
21
+ {%- set content = content.split('</think>')[-1] -%}
22
+ {%- endif -%}
23
+ {{- 'Assistant: ' + content|trim + '\n\n' -}}
24
+ {%- endif -%}
25
+ {%- endfor -%}
26
+ {%- if add_generation_prompt -%}
27
+ {{- 'Assistant:' -}}
28
+ {%- if enable_thinking is defined and enable_thinking is false %}
29
+ {{- ' <think>\n</think>' }}
30
+ {%- endif %}
31
+ {%- if enable_thinking is defined and enable_thinking is true %}
32
+ {{- ' <think>' }}
33
+ {%- endif %}
34
+ {%- endif -%}
@@ -0,0 +1,43 @@
1
+ {%- if tools -%}
2
+ <|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|>
3
+ {%- endif -%}
4
+ {%- for message in messages -%}
5
+ {%- if loop.first and messages[0]['role'] != 'system' -%}
6
+ <|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>
7
+ {%- endif -%}
8
+ {%- if message['role'] == 'system' -%}
9
+ <|im_system|>system<|im_middle|>
10
+ {%- elif message['role'] == 'user' -%}
11
+ <|im_user|>user<|im_middle|>
12
+ {%- elif message['role'] == 'assistant' -%}
13
+ <|im_assistant|>assistant<|im_middle|>
14
+ {%- elif message['role'] == 'tool' -%}
15
+ <|im_system|>tool<|im_middle|>
16
+ {%- endif -%}
17
+ {%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
18
+ {%- if message['content'] -%}{{ message['content'] }}{%- endif -%}
19
+ <|tool_calls_section_begin|>
20
+ {%- for tool_call in message['tool_calls'] -%}
21
+ {%- set func_name = tool_call['function']['name'] -%}
22
+ {%- set formatted_id = 'functions.' + func_name + ':' + loop.index0|string -%}
23
+ <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{{ tool_call['function']['arguments'] | tojson}}<|tool_call_end|>
24
+ {%- endfor -%}
25
+ <|tool_calls_section_end|>
26
+ {%- elif message['role'] == 'tool' -%}
27
+ ## Return of {{ message.tool_call_id }}\n{{ message['content'] }}
28
+ {%- elif message['content'] is string -%}
29
+ {{ message['content'] }}
30
+ {%- elif message['content'] is not none -%}
31
+ {% for content in message['content'] -%}
32
+ {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
33
+ <|media_start|>image<|media_content|><|media_pad|><|media_end|>
34
+ {% else -%}
35
+ {{ content['text'] }}
36
+ {%- endif -%}
37
+ {%- endfor -%}
38
+ {%- endif -%}
39
+ <|im_end|>
40
+ {%- endfor -%}
41
+ {%- if add_generation_prompt -%}
42
+ <|im_assistant|>assistant<|im_middle|>
43
+ {%- endif -%}
@@ -3,6 +3,7 @@
3
3
  -r ../tools/server/tests/requirements.txt
4
4
 
5
5
  -r ./requirements-compare-llama-bench.txt
6
+ -r ./requirements-server-bench.txt
6
7
  -r ./requirements-pydantic.txt
7
8
  -r ./requirements-test-tokenizer-random.txt
8
9
 
@@ -0,0 +1,5 @@
1
+ datasets~=3.2.0
2
+ matplotlib~=3.10.0
3
+ numpy~=1.26.4
4
+ requests~=2.32.3
5
+ tqdm~=4.67.1