@novastera-oss/llamarn 0.2.9 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. package/android/build.gradle +2 -1
  2. package/android/proguard-rules.pro +12 -0
  3. package/android/src/main/cpp/include/llama.h +15 -47
  4. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  8. package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
  9. package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
  10. package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
  11. package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
  12. package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
  13. package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
  14. package/android/src/main/jniLibs/x86/libggml.so +0 -0
  15. package/android/src/main/jniLibs/x86/libllama.so +0 -0
  16. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  17. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  18. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  19. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  20. package/cpp/build-info.cpp +2 -2
  21. package/cpp/llama.cpp/CMakeLists.txt +0 -1
  22. package/cpp/llama.cpp/CMakePresets.json +11 -0
  23. package/cpp/llama.cpp/CODEOWNERS +1 -0
  24. package/cpp/llama.cpp/README.md +8 -8
  25. package/cpp/llama.cpp/build-xcframework.sh +1 -1
  26. package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
  27. package/cpp/llama.cpp/common/arg.cpp +62 -1
  28. package/cpp/llama.cpp/common/chat.cpp +37 -20
  29. package/cpp/llama.cpp/common/chat.h +2 -0
  30. package/cpp/llama.cpp/common/common.cpp +22 -6
  31. package/cpp/llama.cpp/common/common.h +22 -4
  32. package/cpp/llama.cpp/convert_hf_to_gguf.py +1250 -43
  33. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +21 -13
  34. package/cpp/llama.cpp/ggml/CMakeLists.txt +13 -3
  35. package/cpp/llama.cpp/ggml/cmake/ggml-config.cmake.in +85 -47
  36. package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
  37. package/cpp/llama.cpp/ggml/include/ggml-webgpu.h +19 -0
  38. package/cpp/llama.cpp/ggml/include/ggml.h +173 -10
  39. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +1 -1
  40. package/cpp/llama.cpp/ggml/src/ggml-alloc.c +0 -15
  41. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +7 -8
  42. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +44 -38
  43. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +3 -1
  44. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +126 -8
  45. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +130 -22
  46. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +138 -18
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +11 -3
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +1 -1
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +28 -1
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +109 -12
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +3 -0
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +88 -10
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +343 -1094
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1206 -163
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +6 -0
  56. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +0 -1
  57. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1 -1
  58. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +36 -9
  59. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +142 -9
  60. package/cpp/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +3 -3
  61. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +31 -4
  62. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +86 -17
  63. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
  64. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy-utils.cuh +225 -0
  65. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +41 -301
  66. package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
  67. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +85 -64
  68. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +47 -60
  69. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +29 -42
  70. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +46 -59
  71. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +36 -45
  72. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +38 -45
  73. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +23 -36
  74. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +3 -13
  75. package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
  76. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +255 -99
  77. package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cu +1 -1
  78. package/cpp/llama.cpp/ggml/src/ggml-cuda/mma.cuh +111 -3
  79. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +6 -4
  80. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +1152 -695
  81. package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cu +92 -5
  82. package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cuh +2 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
  84. package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
  85. package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cu +275 -0
  86. package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cuh +7 -0
  87. package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
  88. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
  89. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
  90. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +104 -0
  91. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +13 -0
  92. package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
  93. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +27 -6
  94. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +2 -2
  95. package/cpp/llama.cpp/ggml/src/ggml-impl.h +80 -0
  96. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
  97. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +48 -12
  98. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +572 -106
  99. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +599 -105
  100. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +18 -4
  101. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +5 -0
  102. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +800 -42
  103. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  104. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  105. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
  106. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
  107. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +1 -1
  108. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +1 -1
  109. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  110. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +79 -0
  111. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
  112. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
  113. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
  114. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
  115. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
  116. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
  117. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
  118. package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
  119. package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +4 -4
  120. package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
  121. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +693 -1034
  122. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
  123. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +14 -26
  124. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +191 -55
  125. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
  126. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +8 -9
  127. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +15 -18
  128. package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
  129. package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
  130. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +2 -6
  131. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +991 -307
  132. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +265 -0
  133. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +59 -12
  134. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +1 -1
  135. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
  136. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +1 -1
  137. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +1 -1
  138. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
  139. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
  140. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
  141. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
  142. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
  143. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
  144. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
  145. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
  146. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
  147. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
  148. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +2 -0
  149. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +17 -0
  150. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
  151. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +3 -8
  152. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
  153. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
  154. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
  155. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +18 -3
  156. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
  157. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +1 -4
  158. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
  159. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
  160. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
  161. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp +5 -0
  162. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
  163. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
  164. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
  165. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
  166. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +84 -9
  167. package/cpp/llama.cpp/ggml/src/ggml-webgpu/CMakeLists.txt +54 -0
  168. package/cpp/llama.cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +907 -0
  169. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +60 -0
  170. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +35 -0
  171. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  172. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +56 -0
  173. package/cpp/llama.cpp/ggml/src/ggml.c +386 -67
  174. package/cpp/llama.cpp/ggml/src/gguf.cpp +8 -1
  175. package/cpp/llama.cpp/gguf-py/gguf/constants.py +307 -0
  176. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +8 -2
  177. package/cpp/llama.cpp/gguf-py/gguf/metadata.py +4 -0
  178. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_dump.py +24 -1
  179. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +122 -47
  180. package/cpp/llama.cpp/gguf-py/gguf/vocab.py +12 -3
  181. package/cpp/llama.cpp/include/llama.h +15 -47
  182. package/cpp/llama.cpp/models/templates/llama-cpp-rwkv-world.jinja +34 -0
  183. package/cpp/llama.cpp/models/templates/moonshotai-Kimi-K2.jinja +43 -0
  184. package/cpp/llama.cpp/requirements/requirements-all.txt +1 -0
  185. package/cpp/llama.cpp/requirements/requirements-server-bench.txt +5 -0
  186. package/cpp/llama.cpp/src/llama-arch.cpp +316 -3
  187. package/cpp/llama.cpp/src/llama-arch.h +23 -1
  188. package/cpp/llama.cpp/src/llama-batch.cpp +103 -71
  189. package/cpp/llama.cpp/src/llama-batch.h +31 -18
  190. package/cpp/llama.cpp/src/llama-chat.cpp +58 -1
  191. package/cpp/llama.cpp/src/llama-chat.h +3 -0
  192. package/cpp/llama.cpp/src/llama-context.cpp +180 -106
  193. package/cpp/llama.cpp/src/llama-context.h +26 -16
  194. package/cpp/llama.cpp/src/llama-cparams.h +3 -2
  195. package/cpp/llama.cpp/src/llama-graph.cpp +310 -211
  196. package/cpp/llama.cpp/src/llama-graph.h +184 -122
  197. package/cpp/llama.cpp/src/llama-hparams.cpp +47 -1
  198. package/cpp/llama.cpp/src/llama-hparams.h +13 -2
  199. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +38 -22
  200. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +7 -2
  201. package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +849 -304
  202. package/cpp/llama.cpp/src/llama-kv-cache-unified.h +143 -47
  203. package/cpp/llama.cpp/src/llama-kv-cells.h +62 -10
  204. package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +10 -4
  205. package/cpp/llama.cpp/src/llama-memory-hybrid.h +3 -1
  206. package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +36 -11
  207. package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
  208. package/cpp/llama.cpp/src/llama-memory.h +3 -0
  209. package/cpp/llama.cpp/src/llama-model.cpp +3545 -719
  210. package/cpp/llama.cpp/src/llama-model.h +21 -4
  211. package/cpp/llama.cpp/src/llama-quant.cpp +2 -2
  212. package/cpp/llama.cpp/src/llama-vocab.cpp +376 -10
  213. package/cpp/llama.cpp/src/llama-vocab.h +43 -0
  214. package/cpp/llama.cpp/src/unicode.cpp +207 -0
  215. package/cpp/llama.cpp/src/unicode.h +2 -0
  216. package/ios/include/chat.h +2 -0
  217. package/ios/include/common.h +22 -4
  218. package/ios/include/llama.h +15 -47
  219. package/ios/libs/llama.xcframework/Info.plist +13 -13
  220. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  221. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5267 -4890
  222. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  223. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +173 -10
  224. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +15 -47
  225. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  226. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  227. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
  228. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4014 -3764
  229. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  230. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
  231. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
  232. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  233. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  234. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
  235. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4016 -3766
  236. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
  237. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +173 -10
  238. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +15 -47
  239. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
  240. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +173 -10
  241. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +15 -47
  242. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  243. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
  244. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +173 -10
  245. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +15 -47
  246. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  247. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  248. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  249. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5267 -4890
  250. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  251. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +173 -10
  252. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +15 -47
  253. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  254. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  255. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
  256. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4014 -3764
  257. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  258. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
  259. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
  260. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  261. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  262. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5303 -4926
  263. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  264. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +173 -10
  265. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +15 -47
  266. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  267. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  268. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5274 -4897
  269. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4044 -3794
  270. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  271. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
  272. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
  273. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  274. package/package.json +4 -4
  275. package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
  276. package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  277. package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  278. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
  279. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
  280. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
  281. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
  282. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
  283. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
  284. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
  285. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
  286. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
  287. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
  288. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
  289. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
  290. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
  291. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
  292. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
  293. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
  294. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
  295. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
  296. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
  297. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
  298. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
  299. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
  300. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
  301. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
  302. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
  303. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
  304. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
  305. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
  306. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
  307. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
  308. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
  309. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
  310. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
  311. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
  312. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
  313. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
  314. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
@@ -7,7 +7,6 @@ import pathlib
7
7
  import re
8
8
 
9
9
  import requests
10
- import sys
11
10
  import json
12
11
  import shutil
13
12
  import argparse
@@ -69,8 +68,7 @@ args = parser.parse_args()
69
68
  hf_token = args.hf_token if args.hf_token is not None else hf_token
70
69
 
71
70
  if hf_token is None:
72
- logger.error("HF token is required. Please provide it as an argument or set it in ~/.cache/huggingface/token")
73
- sys.exit(1)
71
+ logger.warning("HF token not found. You can provide it as an argument or set it in ~/.cache/huggingface/token")
74
72
 
75
73
  # TODO: this string has to exercise as much pre-tokenizer functionality as possible
76
74
  # will be updated with time - contributions welcome
@@ -128,6 +126,10 @@ models = [
128
126
  {"name": "llama4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", },
129
127
  {"name": "pixtral", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistral-community/pixtral-12b", },
130
128
  {"name": "seed-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base", },
129
+ {"name": "a.x-4.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/skt/A.X-4.0", },
130
+ {"name": "midm-2.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/K-intelligence/Midm-2.0-Base-Instruct", },
131
+ {"name": "lfm2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LiquidAI/LFM2-Tokenizer"},
132
+ {"name": "exaone4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B", },
131
133
  ]
132
134
 
133
135
  # some models are known to be broken upstream, so we will skip them as exceptions
@@ -137,11 +139,18 @@ pre_computed_hashes = [
137
139
  {"name": "chatglm-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-chat", "chkhsh": "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516"},
138
140
  {"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", "chkhsh": "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2"},
139
141
  {"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", "chkhsh": "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35"},
142
+ {"name": "hunyuan", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tencent/Hunyuan-A13B-Instruct", "chkhsh": "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664"},
143
+ # falcon-h1 series uses 4 different tokenizers across model sizes (0.5b - 34b), hence we need to define 4 different hashes
144
+ {"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-0.5B-Base", "chkhsh": "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6"},
145
+ {"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-1B-Base", "chkhsh": "60476e1243776c4fb1b993dbd7a5f15ac22f83c80afdf425fa5ae01c8d44ef86"},
146
+ {"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-7B-Base", "chkhsh": "3eda48b4c4dc7de733d1a8b3e3b4a85243dbbf704da2ee9d42c6beced8897896"},
147
+ {"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-34B-Base", "chkhsh": "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b"},
148
+ {"name": "kimi-k2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/moonshotai/Kimi-K2-Base", "chkhsh": "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890"},
140
149
  ]
141
150
 
142
151
 
143
152
  def download_file_with_auth(url, token, save_path):
144
- headers = {"Authorization": f"Bearer {token}"}
153
+ headers = {"Authorization": f"Bearer {token}"} if token else None
145
154
  response = sess.get(url, headers=headers)
146
155
  response.raise_for_status()
147
156
  os.makedirs(os.path.dirname(save_path), exist_ok=True)
@@ -222,7 +231,7 @@ for model in models:
222
231
  # generate the source code for the convert_hf_to_gguf.py:get_vocab_base_pre() function:
223
232
 
224
233
  src_ifs = ""
225
- for model in [*all_models, *pre_computed_hashes]:
234
+ for model in [*pre_computed_hashes, *all_models]:
226
235
  name = model["name"]
227
236
  tokt = model["tokt"]
228
237
  chkhsh = model.get("chkhsh")
@@ -230,11 +239,6 @@ for model in [*all_models, *pre_computed_hashes]:
230
239
  if tokt == TOKENIZER_TYPE.SPM or tokt == TOKENIZER_TYPE.UGM:
231
240
  continue
232
241
 
233
- # Skip if the tokenizer folder does not exist or there are other download issues previously
234
- if not os.path.exists(f"models/tokenizers/{name}"):
235
- logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
236
- continue
237
-
238
242
  # create the tokenizer
239
243
  if chkhsh is not None:
240
244
  # if the model has a pre-computed hash, use it
@@ -244,15 +248,19 @@ for model in [*all_models, *pre_computed_hashes]:
244
248
  chkhsh = existing_models[name]
245
249
  else:
246
250
  # otherwise, compute the hash of the tokenizer
251
+
252
+ # Fail if the tokenizer folder with config does not exist or there are other download issues previously
253
+ if not os.path.isfile(f"models/tokenizers/{name}/tokenizer_config.json"):
254
+ raise OSError(f"Config for tokenizer {name} not found. The model may not exist or is not accessible with the provided token.")
255
+
247
256
  try:
248
257
  logger.info(f"Loading tokenizer from {f'models/tokenizers/{name}'}...")
249
258
  if name == "t5":
250
259
  tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False)
251
260
  else:
252
261
  tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
253
- except OSError as e:
254
- logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}")
255
- continue # Skip to the next model if the tokenizer can't be loaded
262
+ except Exception as e:
263
+ raise OSError(f"Error loading tokenizer for model {name}.") from e
256
264
 
257
265
  chktok = tokenizer.encode(CHK_TXT)
258
266
  chkhsh = sha256(str(chktok).encode()).hexdigest()
@@ -131,7 +131,7 @@ option(GGML_RVV "ggml: enable rvv" ON)
131
131
  option(GGML_RV_ZFH "ggml: enable riscv zfh" OFF)
132
132
  option(GGML_XTHEADVECTOR "ggml: enable xtheadvector" OFF)
133
133
  option(GGML_VXE "ggml: enable vxe" ON)
134
- option(GGML_NNPA "ggml: enable nnpa" ON)
134
+ option(GGML_NNPA "ggml: enable nnpa" OFF) # temp disabled by default, see: https://github.com/ggml-org/llama.cpp/issues/14877
135
135
 
136
136
  option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
137
137
  set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
@@ -174,6 +174,8 @@ option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental,
174
174
  option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
175
175
  option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
176
176
  option(GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 "ggml: enable rocWMMA FlashAttention on GFX12" OFF)
177
+ option(GGML_MUSA_GRAPHS "ggml: use MUSA graph, experimental, unstable" OFF)
178
+ option(GGML_MUSA_MUDNN_COPY "ggml: enable muDNN for accelerated copy" OFF)
177
179
  option(GGML_VULKAN "ggml: use Vulkan" OFF)
178
180
  option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
179
181
  option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF)
@@ -181,7 +183,8 @@ option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug ou
181
183
  option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF)
182
184
  option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
183
185
  option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
184
- option(GGML_KOMPUTE "ggml: use Kompute" OFF)
186
+ option(GGML_WEBGPU "ggml: use WebGPU" OFF)
187
+ option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF)
185
188
  option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
186
189
  option(GGML_METAL_USE_BF16 "ggml: use bfloat if available" OFF)
187
190
  option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
@@ -266,12 +269,12 @@ set(GGML_PUBLIC_HEADERS
266
269
  include/ggml-cann.h
267
270
  include/ggml-cpp.h
268
271
  include/ggml-cuda.h
269
- include/ggml-kompute.h
270
272
  include/ggml-opt.h
271
273
  include/ggml-metal.h
272
274
  include/ggml-rpc.h
273
275
  include/ggml-sycl.h
274
276
  include/ggml-vulkan.h
277
+ include/ggml-webgpu.h
275
278
  include/gguf.h)
276
279
 
277
280
  set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
@@ -360,6 +363,13 @@ write_basic_package_version_file(
360
363
  VERSION ${GGML_INSTALL_VERSION}
361
364
  COMPATIBILITY SameMajorVersion)
362
365
 
366
+ target_compile_definitions(ggml-base PRIVATE
367
+ GGML_VERSION="${GGML_INSTALL_VERSION}"
368
+ GGML_COMMIT="${GGML_BUILD_COMMIT}"
369
+ )
370
+ message(STATUS "ggml version: ${GGML_INSTALL_VERSION}")
371
+ message(STATUS "ggml commit: ${GGML_BUILD_COMMIT}")
372
+
363
373
  install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake
364
374
  ${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake
365
375
  DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml)
@@ -1,94 +1,130 @@
1
-
2
- @GGML_VARIABLES_EXPANDED@
3
-
4
1
  @PACKAGE_INIT@
5
2
 
6
- set_and_check(GGML_INCLUDE_DIR "@PACKAGE_GGML_INCLUDE_INSTALL_DIR@")
7
- set_and_check(GGML_LIB_DIR "@PACKAGE_GGML_LIB_INSTALL_DIR@")
8
- #set_and_check(GGML_BIN_DIR "@PACKAGE_GGML_BIN_INSTALL_DIR@")
9
-
10
- find_package(Threads REQUIRED)
11
-
12
- find_library(GGML_LIBRARY ggml
13
- REQUIRED
14
- HINTS ${GGML_LIB_DIR}
15
- NO_CMAKE_FIND_ROOT_PATH)
16
-
17
- add_library(ggml::ggml UNKNOWN IMPORTED)
18
- set_target_properties(ggml::ggml
19
- PROPERTIES
20
- IMPORTED_LOCATION "${GGML_LIBRARY}")
21
-
22
- find_library(GGML_BASE_LIBRARY ggml-base
23
- REQUIRED
24
- HINTS ${GGML_LIB_DIR}
25
- NO_CMAKE_FIND_ROOT_PATH)
26
-
27
- add_library(ggml::ggml-base UNKNOWN IMPORTED)
28
- set_target_properties(ggml::ggml-base
29
- PROPERTIES
30
- IMPORTED_LOCATION "${GGML_BASE_LIBRARY}")
3
+ @GGML_VARIABLES_EXPANDED@
31
4
 
5
+ # Find all dependencies before creating any target.
6
+ include(CMakeFindDependencyMacro)
7
+ find_dependency(Threads)
32
8
  if (NOT GGML_SHARED_LIB)
9
+ set(GGML_CPU_INTERFACE_LINK_LIBRARIES "")
10
+ set(GGML_CPU_INTERFACE_LINK_OPTIONS "")
11
+
33
12
  if (APPLE AND GGML_ACCELERATE)
34
- find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED)
13
+ find_library(ACCELERATE_FRAMEWORK Accelerate)
14
+ if(NOT ACCELERATE_FRAMEWORK)
15
+ set(${CMAKE_FIND_PACKAGE_NAME}_FOUND 0)
16
+ return()
17
+ endif()
35
18
  list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES ${ACCELERATE_FRAMEWORK})
36
19
  endif()
37
20
 
38
- if (GGML_OPENMP)
39
- find_package(OpenMP REQUIRED)
21
+ if (GGML_OPENMP_ENABLED)
22
+ find_dependency(OpenMP)
40
23
  list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
41
24
  endif()
42
25
 
43
26
  if (GGML_CPU_HBM)
44
- find_library(memkind memkind REQUIRED)
27
+ find_library(memkind memkind)
28
+ if(NOT memkind)
29
+ set(${CMAKE_FIND_PACKAGE_NAME}_FOUND 0)
30
+ return()
31
+ endif()
45
32
  list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES memkind)
46
33
  endif()
47
34
 
48
35
  if (GGML_BLAS)
49
- find_package(BLAS REQUIRED)
36
+ find_dependency(BLAS)
50
37
  list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES ${BLAS_LIBRARIES})
51
38
  list(APPEND GGML_CPU_INTERFACE_LINK_OPTIONS ${BLAS_LINKER_FLAGS})
52
39
  endif()
53
40
 
54
41
  if (GGML_CUDA)
55
- find_package(CUDAToolkit REQUIRED)
42
+ set(GGML_CUDA_INTERFACE_LINK_LIBRARIES "")
43
+ find_dependency(CUDAToolkit)
44
+ if (GGML_STATIC)
45
+ list(APPEND GGML_CUDA_INTERFACE_LINK_LIBRARIES $<LINK_ONLY:CUDA::cudart_static>)
46
+ if (WIN32)
47
+ list(APPEND GGML_CUDA_INTERFACE_LINK_LIBRARIES $<LINK_ONLY:CUDA::cublas> $<LINK_ONLY:CUDA::cublasLt>)
48
+ else()
49
+ list(APPEND GGML_CUDA_INTERFACE_LINK_LIBRARIES $<LINK_ONLY:CUDA::cublas_static> $<LINK_ONLY:CUDA::cublasLt_static>)
50
+ endif()
51
+ endif()
52
+ if (NOT GGML_CUDA_NO_VMM)
53
+ list(APPEND GGML_CUDA_INTERFACE_LINK_LIBRARIES $<LINK_ONLY:CUDA::cuda_driver>)
54
+ endif()
56
55
  endif()
57
56
 
58
57
  if (GGML_METAL)
59
- find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
60
- find_library(METAL_FRAMEWORK Metal REQUIRED)
61
- find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
58
+ find_library(FOUNDATION_LIBRARY Foundation)
59
+ find_library(METAL_FRAMEWORK Metal)
60
+ find_library(METALKIT_FRAMEWORK MetalKit)
61
+ if(NOT FOUNDATION_LIBRARY OR NOT METAL_FRAMEWORK OR NOT METALKIT_FRAMEWORK)
62
+ set(${CMAKE_FIND_PACKAGE_NAME}_FOUND 0)
63
+ return()
64
+ endif()
65
+ set(GGML_METAL_INTERFACE_LINK_LIBRARIES
66
+ ${FOUNDATION_LIBRARY} ${METAL_FRAMEWORK} ${METALKIT_FRAMEWORK})
67
+ endif()
62
68
 
63
- list(APPEND GGML_METAL_INTERFACE_LINK_LIBRARIES
64
- ${FOUNDATION_LIBRARY} ${METAL_FRAMEWORK} ${METALKIT_FRAMEWORK})
69
+ if (GGML_OPENCL)
70
+ find_dependency(OpenCL)
71
+ set(GGML_OPENCL_INTERFACE_LINK_LIBRARIES $<LINK_ONLY:OpenCL::OpenCL>)
65
72
  endif()
66
73
 
67
74
  if (GGML_VULKAN)
68
- find_package(Vulkan REQUIRED)
69
- list(APPEND GGML_VULKAN_INTERFACE_LINK_LIBRARIES Vulkan::Vulkan)
75
+ find_dependency(Vulkan)
76
+ set(GGML_VULKAN_INTERFACE_LINK_LIBRARIES $<LINK_ONLY:Vulkan::Vulkan>)
70
77
  endif()
71
78
 
72
79
  if (GGML_HIP)
73
- find_package(hip REQUIRED)
74
- find_package(hipblas REQUIRED)
75
- find_package(rocblas REQUIRED)
76
- list(APPEND GGML_HIP_INTERFACE_LINK_LIBRARIES hip::host roc::rocblas roc::hipblas)
80
+ find_dependency(hip)
81
+ find_dependency(hipblas)
82
+ find_dependency(rocblas)
83
+ set(GGML_HIP_INTERFACE_LINK_LIBRARIES hip::host roc::rocblas roc::hipblas)
77
84
  endif()
78
85
 
79
86
  if (GGML_SYCL)
87
+ set(GGML_SYCL_INTERFACE_LINK_LIBRARIES "")
80
88
  find_package(DNNL)
81
89
  if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
82
90
  list(APPEND GGML_SYCL_INTERFACE_LINK_LIBRARIES DNNL::dnnl)
83
91
  endif()
84
92
  if (WIN32)
85
- find_package(IntelSYCL REQUIRED)
86
- find_package(MKL REQUIRED)
93
+ find_dependency(IntelSYCL)
94
+ find_dependency(MKL)
87
95
  list(APPEND GGML_SYCL_INTERFACE_LINK_LIBRARIES IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
88
96
  endif()
89
97
  endif()
90
98
  endif()
91
99
 
100
+ set_and_check(GGML_INCLUDE_DIR "@PACKAGE_GGML_INCLUDE_INSTALL_DIR@")
101
+ set_and_check(GGML_LIB_DIR "@PACKAGE_GGML_LIB_INSTALL_DIR@")
102
+ #set_and_check(GGML_BIN_DIR "@PACKAGE_GGML_BIN_INSTALL_DIR@")
103
+
104
+ if(NOT TARGET ggml::ggml)
105
+
106
+ find_package(Threads REQUIRED)
107
+
108
+ find_library(GGML_LIBRARY ggml
109
+ REQUIRED
110
+ HINTS ${GGML_LIB_DIR}
111
+ NO_CMAKE_FIND_ROOT_PATH)
112
+
113
+ add_library(ggml::ggml UNKNOWN IMPORTED)
114
+ set_target_properties(ggml::ggml
115
+ PROPERTIES
116
+ IMPORTED_LOCATION "${GGML_LIBRARY}")
117
+
118
+ find_library(GGML_BASE_LIBRARY ggml-base
119
+ REQUIRED
120
+ HINTS ${GGML_LIB_DIR}
121
+ NO_CMAKE_FIND_ROOT_PATH)
122
+
123
+ add_library(ggml::ggml-base UNKNOWN IMPORTED)
124
+ set_target_properties(ggml::ggml-base
125
+ PROPERTIES
126
+ IMPORTED_LOCATION "${GGML_BASE_LIBRARY}")
127
+
92
128
  set(_ggml_all_targets "")
93
129
  foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
94
130
  string(REPLACE "-" "_" _ggml_backend_pfx "${_ggml_backend}")
@@ -149,4 +185,6 @@ set_target_properties(ggml::all
149
185
  PROPERTIES
150
186
  INTERFACE_LINK_LIBRARIES "${_ggml_all_targets}")
151
187
 
188
+ endif() # TARGET ggml::ggml
189
+
152
190
  check_required_components(ggml)
@@ -339,7 +339,7 @@ extern "C" {
339
339
  typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
340
340
 
341
341
  // Compare the output of two backends
342
- GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
342
+ GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor * test_node);
343
343
 
344
344
  // Tensor initialization
345
345
  GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
@@ -0,0 +1,19 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+ #include "ggml-backend.h"
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ #define GGML_WEBGPU_NAME "WebGPU"
11
+
12
+ // Needed for examples in ggml
13
+ GGML_BACKEND_API ggml_backend_t ggml_backend_webgpu_init(void);
14
+
15
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_webgpu_reg(void);
16
+
17
+ #ifdef __cplusplus
18
+ }
19
+ #endif
@@ -314,6 +314,13 @@
314
314
  extern "C" {
315
315
  #endif
316
316
 
317
+ // Function type used in fatal error callbacks
318
+ typedef void (*ggml_abort_callback_t)(const char * error_message);
319
+
320
+ // Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
321
+ // Returns the old callback for chaining
322
+ GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback);
323
+
317
324
  GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
318
325
  GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
319
326
 
@@ -482,12 +489,13 @@ extern "C" {
482
489
  GGML_OP_CONV_TRANSPOSE_1D,
483
490
  GGML_OP_IM2COL,
484
491
  GGML_OP_IM2COL_BACK,
492
+ GGML_OP_CONV_2D,
485
493
  GGML_OP_CONV_2D_DW,
486
494
  GGML_OP_CONV_TRANSPOSE_2D,
487
495
  GGML_OP_POOL_1D,
488
496
  GGML_OP_POOL_2D,
489
497
  GGML_OP_POOL_2D_BACK,
490
- GGML_OP_UPSCALE, // nearest interpolate
498
+ GGML_OP_UPSCALE,
491
499
  GGML_OP_PAD,
492
500
  GGML_OP_PAD_REFLECT_1D,
493
501
  GGML_OP_ROLL,
@@ -520,6 +528,8 @@ extern "C" {
520
528
  GGML_OP_CROSS_ENTROPY_LOSS_BACK,
521
529
  GGML_OP_OPT_STEP_ADAMW,
522
530
 
531
+ GGML_OP_GLU,
532
+
523
533
  GGML_OP_COUNT,
524
534
  };
525
535
 
@@ -543,6 +553,16 @@ extern "C" {
543
553
  GGML_UNARY_OP_COUNT,
544
554
  };
545
555
 
556
+ enum ggml_glu_op {
557
+ GGML_GLU_OP_REGLU,
558
+ GGML_GLU_OP_GEGLU,
559
+ GGML_GLU_OP_SWIGLU,
560
+ GGML_GLU_OP_GEGLU_ERF,
561
+ GGML_GLU_OP_GEGLU_QUICK,
562
+
563
+ GGML_GLU_OP_COUNT,
564
+ };
565
+
546
566
  enum ggml_object_type {
547
567
  GGML_OBJECT_TYPE_TENSOR,
548
568
  GGML_OBJECT_TYPE_GRAPH,
@@ -628,6 +648,9 @@ extern "C" {
628
648
 
629
649
  // misc
630
650
 
651
+ GGML_API const char * ggml_version(void);
652
+ GGML_API const char * ggml_commit(void);
653
+
631
654
  GGML_API void ggml_time_init(void); // call this once at the beginning of the program
632
655
  GGML_API int64_t ggml_time_ms(void);
633
656
  GGML_API int64_t ggml_time_us(void);
@@ -658,6 +681,7 @@ extern "C" {
658
681
  GGML_API const char * ggml_op_symbol(enum ggml_op op);
659
682
 
660
683
  GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
684
+ GGML_API const char * ggml_glu_op_name(enum ggml_glu_op op);
661
685
  GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
662
686
 
663
687
  GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
@@ -762,6 +786,7 @@ extern "C" {
762
786
  GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
763
787
 
764
788
  GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
789
+ GGML_API enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor);
765
790
 
766
791
  GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
767
792
  GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
@@ -1090,6 +1115,89 @@ extern "C" {
1090
1115
  struct ggml_context * ctx,
1091
1116
  struct ggml_tensor * a);
1092
1117
 
1118
+ // gated linear unit ops
1119
+ // A: n columns, r rows,
1120
+ // result is n / 2 columns, r rows,
1121
+ // expects gate in second half of row, unless swapped is true
1122
+ GGML_API struct ggml_tensor * ggml_glu(
1123
+ struct ggml_context * ctx,
1124
+ struct ggml_tensor * a,
1125
+ enum ggml_glu_op op,
1126
+ bool swapped);
1127
+
1128
+ GGML_API struct ggml_tensor * ggml_reglu(
1129
+ struct ggml_context * ctx,
1130
+ struct ggml_tensor * a);
1131
+
1132
+ GGML_API struct ggml_tensor * ggml_reglu_swapped(
1133
+ struct ggml_context * ctx,
1134
+ struct ggml_tensor * a);
1135
+
1136
+ GGML_API struct ggml_tensor * ggml_geglu(
1137
+ struct ggml_context * ctx,
1138
+ struct ggml_tensor * a);
1139
+
1140
+ GGML_API struct ggml_tensor * ggml_geglu_swapped(
1141
+ struct ggml_context * ctx,
1142
+ struct ggml_tensor * a);
1143
+
1144
+ GGML_API struct ggml_tensor * ggml_swiglu(
1145
+ struct ggml_context * ctx,
1146
+ struct ggml_tensor * a);
1147
+
1148
+ GGML_API struct ggml_tensor * ggml_swiglu_swapped(
1149
+ struct ggml_context * ctx,
1150
+ struct ggml_tensor * a);
1151
+
1152
+ GGML_API struct ggml_tensor * ggml_geglu_erf(
1153
+ struct ggml_context * ctx,
1154
+ struct ggml_tensor * a);
1155
+
1156
+ GGML_API struct ggml_tensor * ggml_geglu_erf_swapped(
1157
+ struct ggml_context * ctx,
1158
+ struct ggml_tensor * a);
1159
+
1160
+ GGML_API struct ggml_tensor * ggml_geglu_quick(
1161
+ struct ggml_context * ctx,
1162
+ struct ggml_tensor * a);
1163
+
1164
+ GGML_API struct ggml_tensor * ggml_geglu_quick_swapped(
1165
+ struct ggml_context * ctx,
1166
+ struct ggml_tensor * a);
1167
+
1168
+ // A: n columns, r rows,
1169
+ // B: n columns, r rows,
1170
+ GGML_API struct ggml_tensor * ggml_glu_split(
1171
+ struct ggml_context * ctx,
1172
+ struct ggml_tensor * a,
1173
+ struct ggml_tensor * b,
1174
+ enum ggml_glu_op op);
1175
+
1176
+ GGML_API struct ggml_tensor * ggml_reglu_split(
1177
+ struct ggml_context * ctx,
1178
+ struct ggml_tensor * a,
1179
+ struct ggml_tensor * b);
1180
+
1181
+ GGML_API struct ggml_tensor * ggml_geglu_split(
1182
+ struct ggml_context * ctx,
1183
+ struct ggml_tensor * a,
1184
+ struct ggml_tensor * b);
1185
+
1186
+ GGML_API struct ggml_tensor * ggml_swiglu_split(
1187
+ struct ggml_context * ctx,
1188
+ struct ggml_tensor * a,
1189
+ struct ggml_tensor * b);
1190
+
1191
+ GGML_API struct ggml_tensor * ggml_geglu_erf_split(
1192
+ struct ggml_context * ctx,
1193
+ struct ggml_tensor * a,
1194
+ struct ggml_tensor * b);
1195
+
1196
+ GGML_API struct ggml_tensor * ggml_geglu_quick_split(
1197
+ struct ggml_context * ctx,
1198
+ struct ggml_tensor * a,
1199
+ struct ggml_tensor * b);
1200
+
1093
1201
  // normalize along rows
1094
1202
  GGML_API struct ggml_tensor * ggml_norm(
1095
1203
  struct ggml_context * ctx,
@@ -1189,6 +1297,19 @@ extern "C" {
1189
1297
  struct ggml_tensor * a,
1190
1298
  float s);
1191
1299
 
1300
+ // x = s * a + b
1301
+ GGML_API struct ggml_tensor * ggml_scale_bias(
1302
+ struct ggml_context * ctx,
1303
+ struct ggml_tensor * a,
1304
+ float s,
1305
+ float b);
1306
+
1307
+ GGML_API struct ggml_tensor * ggml_scale_bias_inplace(
1308
+ struct ggml_context * ctx,
1309
+ struct ggml_tensor * a,
1310
+ float s,
1311
+ float b);
1312
+
1192
1313
  // b -> view(a,offset,nb1,nb2,3), return modified a
1193
1314
  GGML_API struct ggml_tensor * ggml_set(
1194
1315
  struct ggml_context * ctx,
@@ -1433,8 +1554,14 @@ extern "C" {
1433
1554
  struct ggml_context * ctx,
1434
1555
  struct ggml_tensor * a);
1435
1556
 
1557
+ // a [ne0, ne01, ne02, ne03]
1558
+ // mask [ne0, ne11, ne12, ne13] | ne11 >= ne01, F16 or F32, optional
1559
+ //
1560
+ // broadcast:
1561
+ // ne02 % ne12 == 0
1562
+ // ne03 % ne13 == 0
1563
+ //
1436
1564
  // fused soft_max(a*scale + mask*(ALiBi slope))
1437
- // mask is optional
1438
1565
  // max_bias = 0.0f for no ALiBi
1439
1566
  GGML_API struct ggml_tensor * ggml_soft_max_ext(
1440
1567
  struct ggml_context * ctx,
@@ -1744,6 +1871,17 @@ extern "C" {
1744
1871
  struct ggml_tensor * b,
1745
1872
  int stride);
1746
1873
 
1874
+ GGML_API struct ggml_tensor * ggml_conv_2d_direct(
1875
+ struct ggml_context * ctx,
1876
+ struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
1877
+ struct ggml_tensor * b, // input data [W, H, C, N]
1878
+ int s0, // stride dimension 0
1879
+ int s1, // stride dimension 1
1880
+ int p0, // padding dimension 0
1881
+ int p1, // padding dimension 1
1882
+ int d0, // dilation dimension 0
1883
+ int d1); // dilation dimension 1
1884
+
1747
1885
  enum ggml_op_pool {
1748
1886
  GGML_OP_POOL_MAX,
1749
1887
  GGML_OP_POOL_AVG,
@@ -1786,6 +1924,12 @@ extern "C" {
1786
1924
  enum ggml_scale_mode {
1787
1925
  GGML_SCALE_MODE_NEAREST = 0,
1788
1926
  GGML_SCALE_MODE_BILINEAR = 1,
1927
+
1928
+ GGML_SCALE_MODE_COUNT
1929
+ };
1930
+
1931
+ enum ggml_scale_flag {
1932
+ GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
1789
1933
  };
1790
1934
 
1791
1935
  // interpolate
@@ -1798,14 +1942,26 @@ extern "C" {
1798
1942
 
1799
1943
  // interpolate
1800
1944
  // interpolate scale to specified dimensions
1801
- GGML_API struct ggml_tensor * ggml_upscale_ext(
1945
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
1802
1946
  struct ggml_context * ctx,
1803
1947
  struct ggml_tensor * a,
1804
1948
  int ne0,
1805
1949
  int ne1,
1806
1950
  int ne2,
1807
1951
  int ne3,
1808
- enum ggml_scale_mode mode);
1952
+ enum ggml_scale_mode mode),
1953
+ "use ggml_interpolate instead");
1954
+
1955
+ // Up- or downsamples the input to the specified size.
1956
+ // 2D scale modes (eg. bilinear) are applied to the first two dimensions.
1957
+ GGML_API struct ggml_tensor * ggml_interpolate(
1958
+ struct ggml_context * ctx,
1959
+ struct ggml_tensor * a,
1960
+ int64_t ne0,
1961
+ int64_t ne1,
1962
+ int64_t ne2,
1963
+ int64_t ne3,
1964
+ uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...]
1809
1965
 
1810
1966
  // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
1811
1967
  GGML_API struct ggml_tensor * ggml_pad(
@@ -1868,11 +2024,17 @@ extern "C" {
1868
2024
 
1869
2025
  #define GGML_KQ_MASK_PAD 64
1870
2026
 
1871
- // q: [n_embd_k, n_batch, n_head, 1]
1872
- // k: [n_embd_k, n_kv, n_head_kv, 1]
1873
- // v: [n_embd_v, n_kv, n_head_kv, 1] !! not transposed !!
1874
- // mask: [n_kv, n_batch_pad, 1, 1] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
1875
- // res: [n_embd_v, n_head, n_batch, 1] !! permuted !!
2027
+ // q: [n_embd_k, n_batch, n_head, ne3 ]
2028
+ // k: [n_embd_k, n_kv, n_head_kv, ne3 ]
2029
+ // v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
2030
+ // mask: [n_kv, n_batch_pad, ne32, ne33] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
2031
+ // res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
2032
+ //
2033
+ // broadcast:
2034
+ // n_head % n_head_kv == 0
2035
+ // n_head % ne32 == 0
2036
+ // ne3 % ne33 == 0
2037
+ //
1876
2038
  GGML_API struct ggml_tensor * ggml_flash_attn_ext(
1877
2039
  struct ggml_context * ctx,
1878
2040
  struct ggml_tensor * q,
@@ -1911,7 +2073,8 @@ extern "C" {
1911
2073
  struct ggml_tensor * dt,
1912
2074
  struct ggml_tensor * A,
1913
2075
  struct ggml_tensor * B,
1914
- struct ggml_tensor * C);
2076
+ struct ggml_tensor * C,
2077
+ struct ggml_tensor * ids);
1915
2078
 
1916
2079
  // partition into non-overlapping windows with padding if needed
1917
2080
  // example:
@@ -365,12 +365,12 @@ ggml_add_backend(BLAS)
365
365
  ggml_add_backend(CANN)
366
366
  ggml_add_backend(CUDA)
367
367
  ggml_add_backend(HIP)
368
- ggml_add_backend(Kompute)
369
368
  ggml_add_backend(METAL)
370
369
  ggml_add_backend(MUSA)
371
370
  ggml_add_backend(RPC)
372
371
  ggml_add_backend(SYCL)
373
372
  ggml_add_backend(Vulkan)
373
+ ggml_add_backend(WebGPU)
374
374
  ggml_add_backend(OpenCL)
375
375
 
376
376
  foreach (target ggml-base ggml)