@novastera-oss/llamarn 0.4.0 → 0.4.3-beta4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (979) hide show
  1. package/RNLlamaCpp.podspec +4 -1
  2. package/android/CMakeLists.txt +13 -3
  3. package/android/src/main/cpp/include/llama.h +44 -21
  4. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  8. package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
  9. package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
  10. package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
  11. package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
  12. package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
  13. package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
  14. package/android/src/main/jniLibs/x86/libggml.so +0 -0
  15. package/android/src/main/jniLibs/x86/libllama.so +0 -0
  16. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  17. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  18. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  19. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  20. package/cpp/LlamaCppModel.cpp +2 -10
  21. package/cpp/SystemUtils.cpp +3 -7
  22. package/cpp/build-info.cpp +2 -2
  23. package/cpp/llama.cpp/CMakeLists.txt +12 -0
  24. package/cpp/llama.cpp/CODEOWNERS +116 -10
  25. package/cpp/llama.cpp/CONTRIBUTING.md +30 -3
  26. package/cpp/llama.cpp/README.md +13 -5
  27. package/cpp/llama.cpp/build-xcframework.sh +5 -0
  28. package/cpp/llama.cpp/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
  29. package/cpp/llama.cpp/common/CMakeLists.txt +12 -2
  30. package/cpp/llama.cpp/common/arg.cpp +303 -795
  31. package/cpp/llama.cpp/common/arg.h +2 -3
  32. package/cpp/llama.cpp/common/chat-parser-xml-toolcall.cpp +861 -0
  33. package/cpp/llama.cpp/common/chat-parser-xml-toolcall.h +45 -0
  34. package/cpp/llama.cpp/common/chat-parser.cpp +156 -15
  35. package/cpp/llama.cpp/common/chat-parser.h +13 -0
  36. package/cpp/llama.cpp/common/chat.cpp +1147 -88
  37. package/cpp/llama.cpp/common/chat.h +16 -3
  38. package/cpp/llama.cpp/common/common.cpp +70 -15
  39. package/cpp/llama.cpp/common/common.h +57 -19
  40. package/cpp/llama.cpp/common/download.cpp +1072 -0
  41. package/cpp/llama.cpp/common/download.h +55 -0
  42. package/cpp/llama.cpp/common/http.h +73 -0
  43. package/cpp/llama.cpp/common/json-partial.cpp +70 -2
  44. package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +61 -22
  45. package/cpp/llama.cpp/common/json-schema-to-grammar.h +2 -0
  46. package/cpp/llama.cpp/common/log.cpp +59 -2
  47. package/cpp/llama.cpp/common/log.h +12 -4
  48. package/cpp/llama.cpp/common/sampling.cpp +84 -8
  49. package/cpp/llama.cpp/common/sampling.h +3 -1
  50. package/cpp/llama.cpp/common/speculative.cpp +1 -1
  51. package/cpp/llama.cpp/convert_hf_to_gguf.py +1608 -233
  52. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +6 -1
  53. package/cpp/llama.cpp/convert_lora_to_gguf.py +37 -5
  54. package/cpp/llama.cpp/ggml/CMakeLists.txt +47 -28
  55. package/cpp/llama.cpp/ggml/include/ggml-backend.h +19 -1
  56. package/cpp/llama.cpp/ggml/include/ggml-cpu.h +1 -1
  57. package/cpp/llama.cpp/ggml/include/ggml-hexagon.h +19 -0
  58. package/cpp/llama.cpp/ggml/include/ggml-metal.h +1 -6
  59. package/cpp/llama.cpp/ggml/include/ggml-rpc.h +7 -9
  60. package/cpp/llama.cpp/ggml/include/ggml-zdnn.h +2 -1
  61. package/cpp/llama.cpp/ggml/include/ggml.h +199 -6
  62. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +38 -0
  63. package/cpp/llama.cpp/ggml/src/ggml-alloc.c +299 -130
  64. package/cpp/llama.cpp/ggml/src/ggml-backend-impl.h +4 -4
  65. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +21 -5
  66. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +99 -2
  67. package/cpp/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +1 -1
  68. package/cpp/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +1 -0
  69. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +57 -45
  70. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +138 -47
  71. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +1584 -1773
  72. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +201 -317
  73. package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +146 -187
  74. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +771 -713
  75. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +135 -77
  76. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +5 -2
  77. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +428 -26
  78. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +16 -17
  79. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +318 -145
  80. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  81. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +155 -60
  82. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +8 -8
  83. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +0 -1
  84. package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +14 -0
  85. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +10 -9
  86. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +108 -64
  87. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +14 -4
  88. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +530 -87
  89. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +37 -45
  90. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +349 -127
  91. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +947 -1218
  92. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +5 -4
  93. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +143 -29
  94. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +82 -76
  95. package/cpp/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
  96. package/cpp/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
  97. package/cpp/llama.cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
  98. package/cpp/llama.cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
  99. package/cpp/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +151 -0
  100. package/cpp/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +7 -0
  101. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +233 -28
  102. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +326 -66
  103. package/cpp/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +12 -3
  104. package/cpp/llama.cpp/ggml/src/ggml-cuda/argsort.cu +102 -6
  105. package/cpp/llama.cpp/ggml/src/ggml-cuda/binbcast.cu +110 -76
  106. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +167 -38
  107. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d.cu +6 -11
  108. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +12 -0
  109. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy-utils.cuh +1 -1
  110. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +245 -151
  111. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cuh +1 -5
  112. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +341 -289
  113. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile.cu +49 -0
  114. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile.cuh +1233 -0
  115. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec.cuh +586 -0
  116. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +6 -6
  117. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +48 -0
  118. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +123 -220
  119. package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +41 -39
  120. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +715 -45
  121. package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cu +150 -0
  122. package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cuh +1 -0
  123. package/cpp/llama.cpp/ggml/src/ggml-cuda/mma.cuh +321 -24
  124. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmf.cu +93 -351
  125. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmf.cuh +828 -1
  126. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmid.cu +164 -0
  127. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmid.cuh +5 -0
  128. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +3 -166
  129. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +1 -1
  130. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvf.cu +371 -78
  131. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvf.cuh +3 -2
  132. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvq.cu +279 -147
  133. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvq.cuh +1 -1
  134. package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cu +97 -85
  135. package/cpp/llama.cpp/ggml/src/ggml-cuda/pad.cu +46 -23
  136. package/cpp/llama.cpp/ggml/src/ggml-cuda/pad_reflect_1d.cu +63 -54
  137. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +12 -10
  138. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +192 -77
  139. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cuh +2 -0
  140. package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +10 -9
  141. package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cu +137 -75
  142. package/cpp/llama.cpp/ggml/src/ggml-cuda/set.cu +39 -0
  143. package/cpp/llama.cpp/ggml/src/ggml-cuda/set.cuh +7 -0
  144. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
  145. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
  146. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
  147. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
  148. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
  149. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
  150. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
  151. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
  152. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
  153. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu +7 -0
  154. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu +7 -0
  155. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu +7 -0
  156. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu +7 -0
  157. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu +7 -0
  158. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu +7 -0
  159. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu +7 -0
  160. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu +7 -0
  161. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu +7 -0
  162. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu +7 -0
  163. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu +7 -0
  164. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu +7 -0
  165. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu +7 -0
  166. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu +7 -0
  167. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu +7 -0
  168. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu +7 -0
  169. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu +7 -0
  170. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu +7 -0
  171. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu +7 -0
  172. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu +7 -0
  173. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu +7 -0
  174. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu +7 -0
  175. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu +7 -0
  176. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu +7 -0
  177. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu +7 -0
  178. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu +7 -0
  179. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu +7 -0
  180. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu +7 -0
  181. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu +7 -0
  182. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu +7 -0
  183. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu +7 -0
  184. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu +7 -0
  185. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu +7 -0
  186. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu +7 -0
  187. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu +7 -0
  188. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu +7 -0
  189. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +40 -19
  190. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu +5 -0
  191. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu +5 -0
  192. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu +5 -0
  193. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu +5 -0
  194. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu +5 -0
  195. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu +5 -0
  196. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu +5 -0
  197. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu +5 -0
  198. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu +5 -0
  199. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu +5 -0
  200. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu +5 -0
  201. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu +5 -0
  202. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu +5 -0
  203. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu +5 -0
  204. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu +5 -0
  205. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu +5 -0
  206. package/cpp/llama.cpp/ggml/src/ggml-cuda/topk-moe.cu +336 -0
  207. package/cpp/llama.cpp/ggml/src/ggml-cuda/topk-moe.cuh +16 -0
  208. package/cpp/llama.cpp/ggml/src/ggml-cuda/tsembd.cu +3 -3
  209. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +105 -11
  210. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +36 -0
  211. package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +87 -6
  212. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +28 -12
  213. package/cpp/llama.cpp/ggml/src/ggml-hexagon/CMakeLists.txt +68 -0
  214. package/cpp/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3807 -0
  215. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +40 -0
  216. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/act-ops.c +442 -0
  217. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +360 -0
  218. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  219. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +40 -0
  220. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-dma.c +69 -0
  221. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-dma.h +119 -0
  222. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +156 -0
  223. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +64 -0
  224. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
  225. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-exp.c +93 -0
  226. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.c +60 -0
  227. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +49 -0
  228. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-utils.c +960 -0
  229. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +1032 -0
  230. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/main.c +829 -0
  231. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2223 -0
  232. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/ops-utils.h +149 -0
  233. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +418 -0
  234. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +402 -0
  235. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +255 -0
  236. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +297 -0
  237. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  238. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp-utils.c +448 -0
  239. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp-utils.h +220 -0
  240. package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +8 -13
  241. package/cpp/llama.cpp/ggml/src/ggml-impl.h +110 -12
  242. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +6 -5
  243. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +446 -0
  244. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  245. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-context.h +33 -0
  246. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-context.m +599 -0
  247. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1662 -0
  248. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.h +251 -0
  249. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.m +1527 -0
  250. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +244 -39
  251. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +3844 -0
  252. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-ops.h +90 -0
  253. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.cpp +723 -0
  254. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +3453 -1907
  255. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +3 -1
  256. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +10 -0
  257. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1331 -109
  258. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/cvt.cl +126 -0
  259. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +31 -4
  260. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +35 -7
  261. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +31 -4
  262. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  263. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  264. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +36 -12
  265. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  266. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +24 -10
  267. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +24 -10
  268. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  269. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  270. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  271. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  272. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  273. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  274. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  275. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/pad.cl +29 -20
  276. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +25 -10
  277. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rope.cl +50 -24
  278. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +123 -10
  279. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +2 -2
  280. package/cpp/llama.cpp/ggml/src/ggml-quants.c +1 -0
  281. package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +341 -161
  282. package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +6 -0
  283. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +6 -5
  284. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +74 -15
  285. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +50 -30
  286. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +10 -4
  287. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +166 -99
  288. package/cpp/llama.cpp/ggml/src/ggml-sycl/count-equal.cpp +79 -0
  289. package/cpp/llama.cpp/ggml/src/ggml-sycl/count-equal.hpp +9 -0
  290. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +72 -94
  291. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +67 -49
  292. package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +21 -31
  293. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +252 -316
  294. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +6 -2
  295. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +9 -6
  296. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +359 -142
  297. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
  298. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
  299. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +80 -60
  300. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +201 -132
  301. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +230 -55
  302. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.hpp +2 -0
  303. package/cpp/llama.cpp/ggml/src/ggml-sycl/pad.cpp +97 -0
  304. package/cpp/llama.cpp/ggml/src/ggml-sycl/pad.hpp +24 -0
  305. package/cpp/llama.cpp/ggml/src/ggml-sycl/pad_reflect_1d.cpp +72 -0
  306. package/cpp/llama.cpp/ggml/src/ggml-sycl/pad_reflect_1d.hpp +8 -0
  307. package/cpp/llama.cpp/ggml/src/ggml-sycl/presets.hpp +2 -0
  308. package/cpp/llama.cpp/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
  309. package/cpp/llama.cpp/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
  310. package/cpp/llama.cpp/ggml/src/ggml-sycl/roll.cpp +122 -0
  311. package/cpp/llama.cpp/ggml/src/ggml-sycl/roll.hpp +20 -0
  312. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +50 -41
  313. package/cpp/llama.cpp/ggml/src/ggml-sycl/set.cpp +73 -0
  314. package/cpp/llama.cpp/ggml/src/ggml-sycl/set.hpp +5 -0
  315. package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +45 -36
  316. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +330 -165
  317. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +4 -0
  318. package/cpp/llama.cpp/ggml/src/ggml-sycl/ssm_conv.cpp +127 -0
  319. package/cpp/llama.cpp/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
  320. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +12 -6
  321. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +16 -12
  322. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +38 -18
  323. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +4184 -2159
  324. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
  325. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +2 -2
  326. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +2 -2
  327. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
  328. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +1 -1
  329. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
  330. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +2 -2
  331. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +33 -26
  332. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
  333. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
  334. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +2 -2
  335. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +2 -2
  336. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +2 -2
  337. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +1 -1
  338. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +53 -30
  339. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +1 -1
  340. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +2 -2
  341. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +3 -3
  342. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +13 -6
  343. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
  344. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +2 -2
  345. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +2 -2
  346. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +1 -1
  347. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs.comp → dequant_funcs.glsl} +138 -2
  348. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs_cm2.comp → dequant_funcs_cm2.glsl} +18 -4
  349. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_head.comp → dequant_head.glsl} +1 -1
  350. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
  351. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +1 -1
  352. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +2 -2
  353. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +1 -1
  354. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +3 -2
  355. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +7 -6
  356. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +5 -3
  357. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +1 -1
  358. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +1 -1
  359. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +3 -3
  360. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +3 -3
  361. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
  362. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +1 -1
  363. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +1 -1
  364. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +3 -3
  365. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +1 -1
  366. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +1 -1
  367. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +3 -3
  368. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
  369. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +1 -1
  370. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +1 -1
  371. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +2 -2
  372. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +3 -2
  373. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
  374. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +52 -14
  375. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{flash_attn_base.comp → flash_attn_base.glsl} +50 -12
  376. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +61 -12
  377. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +54 -12
  378. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +5 -1
  379. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
  380. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +2 -2
  381. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +2 -2
  382. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +2 -2
  383. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +2 -2
  384. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +2 -2
  385. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +2 -2
  386. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{generic_binary_head.comp → generic_binary_head.glsl} +10 -2
  387. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +21 -12
  388. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +28 -18
  389. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{glu_head.comp → glu_head.glsl} +1 -1
  390. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +2 -2
  391. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
  392. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
  393. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +15 -7
  394. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +125 -0
  395. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +2 -2
  396. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +2 -2
  397. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
  398. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +2 -2
  399. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +1 -1
  400. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +229 -0
  401. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +33 -0
  402. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +1 -1
  403. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +1 -1
  404. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +1 -1
  405. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +1 -1
  406. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +1 -1
  407. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +1 -1
  408. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +1 -1
  409. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +9 -7
  410. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +9 -7
  411. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +3 -5
  412. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +1 -1
  413. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +3 -5
  414. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +3 -5
  415. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +1 -1
  416. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +140 -0
  417. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +106 -634
  418. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +118 -9
  419. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl +556 -0
  420. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +70 -0
  421. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +77 -214
  422. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +589 -0
  423. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
  424. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +97 -13
  425. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
  426. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +2 -2
  427. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +2 -2
  428. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +1 -1
  429. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +25 -4
  430. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +1 -1
  431. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +55 -5
  432. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +2 -2
  433. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +2 -2
  434. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +2 -2
  435. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +2 -2
  436. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +45 -3
  437. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +2 -2
  438. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +2 -2
  439. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +2 -2
  440. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +227 -0
  441. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
  442. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +5 -52
  443. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +5 -35
  444. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +5 -35
  445. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +27 -0
  446. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +5 -41
  447. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
  448. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +2 -2
  449. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +2 -2
  450. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +2 -2
  451. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +2 -2
  452. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +2 -2
  453. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +1 -1
  454. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +6 -2
  455. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
  456. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +2 -2
  457. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +2 -2
  458. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +44 -0
  459. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +140 -0
  460. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
  461. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +2 -2
  462. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +1 -1
  463. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +2 -2
  464. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +2 -2
  465. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +2 -2
  466. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +5 -4
  467. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +171 -0
  468. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
  469. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{types.comp → types.glsl} +79 -29
  470. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -12
  471. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +471 -196
  472. package/cpp/llama.cpp/ggml/src/ggml-webgpu/CMakeLists.txt +8 -0
  473. package/cpp/llama.cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1690 -383
  474. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +188 -0
  475. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +45 -0
  476. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +930 -0
  477. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +101 -0
  478. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +57 -10
  479. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +874 -0
  480. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
  481. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +25 -912
  482. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +97 -0
  483. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.tmpl.wgsl +247 -0
  484. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.tmpl.wgsl +302 -0
  485. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +267 -0
  486. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
  487. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
  488. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.tmpl.wgsl +90 -0
  489. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/{set_rows.wgsl → set_rows.tmpl.wgsl} +38 -8
  490. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
  491. package/cpp/llama.cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
  492. package/cpp/llama.cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +96 -314
  493. package/cpp/llama.cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  494. package/cpp/llama.cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  495. package/cpp/llama.cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
  496. package/cpp/llama.cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
  497. package/cpp/llama.cpp/ggml/src/ggml.c +440 -17
  498. package/cpp/llama.cpp/ggml/src/gguf.cpp +104 -29
  499. package/cpp/llama.cpp/gguf-py/gguf/constants.py +363 -13
  500. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +64 -0
  501. package/cpp/llama.cpp/gguf-py/gguf/lazy.py +8 -3
  502. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_convert_endian.py +6 -0
  503. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +156 -18
  504. package/cpp/llama.cpp/gguf-py/gguf/utility.py +80 -0
  505. package/cpp/llama.cpp/gguf-py/gguf/vocab.py +4 -4
  506. package/cpp/llama.cpp/include/llama.h +44 -21
  507. package/cpp/llama.cpp/media/llama1-icon-transparent.png +0 -0
  508. package/cpp/llama.cpp/media/llama1-icon-transparent.svg +77 -0
  509. package/cpp/llama.cpp/media/llama1-icon.png +0 -0
  510. package/cpp/llama.cpp/media/llama1-icon.svg +87 -0
  511. package/cpp/llama.cpp/requirements/requirements-all.txt +2 -0
  512. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -3
  513. package/cpp/llama.cpp/requirements/requirements-convert_legacy_llama.txt +3 -1
  514. package/cpp/llama.cpp/requirements/requirements-tool_bench.txt +1 -1
  515. package/cpp/llama.cpp/src/CMakeLists.txt +101 -0
  516. package/cpp/llama.cpp/src/llama-adapter.cpp +33 -0
  517. package/cpp/llama.cpp/src/llama-adapter.h +3 -0
  518. package/cpp/llama.cpp/src/llama-arch.cpp +344 -14
  519. package/cpp/llama.cpp/src/llama-arch.h +50 -0
  520. package/cpp/llama.cpp/src/llama-batch.cpp +63 -31
  521. package/cpp/llama.cpp/src/llama-batch.h +13 -2
  522. package/cpp/llama.cpp/src/llama-chat.cpp +85 -3
  523. package/cpp/llama.cpp/src/llama-chat.h +4 -0
  524. package/cpp/llama.cpp/src/llama-context.cpp +300 -45
  525. package/cpp/llama.cpp/src/llama-context.h +16 -6
  526. package/cpp/llama.cpp/src/llama-cparams.h +2 -1
  527. package/cpp/llama.cpp/src/llama-grammar.cpp +17 -9
  528. package/cpp/llama.cpp/src/llama-graph.cpp +226 -64
  529. package/cpp/llama.cpp/src/llama-graph.h +27 -5
  530. package/cpp/llama.cpp/src/llama-hparams.cpp +53 -2
  531. package/cpp/llama.cpp/src/llama-hparams.h +48 -8
  532. package/cpp/llama.cpp/src/llama-impl.cpp +3 -3
  533. package/cpp/llama.cpp/src/llama-impl.h +2 -0
  534. package/cpp/llama.cpp/src/llama-kv-cache-iswa.cpp +13 -3
  535. package/cpp/llama.cpp/src/llama-kv-cache-iswa.h +2 -0
  536. package/cpp/llama.cpp/src/llama-kv-cache.cpp +120 -62
  537. package/cpp/llama.cpp/src/llama-kv-cache.h +13 -4
  538. package/cpp/llama.cpp/src/llama-kv-cells.h +44 -2
  539. package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +19 -9
  540. package/cpp/llama.cpp/src/llama-memory-hybrid.h +2 -0
  541. package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +38 -17
  542. package/cpp/llama.cpp/src/llama-memory-recurrent.h +5 -2
  543. package/cpp/llama.cpp/src/llama-memory.h +3 -0
  544. package/cpp/llama.cpp/src/llama-model-loader.cpp +2 -0
  545. package/cpp/llama.cpp/src/llama-model.cpp +1070 -12614
  546. package/cpp/llama.cpp/src/llama-model.h +40 -4
  547. package/cpp/llama.cpp/src/llama-quant.cpp +14 -6
  548. package/cpp/llama.cpp/src/llama-sampling.cpp +243 -136
  549. package/cpp/llama.cpp/src/llama-vocab.cpp +43 -3
  550. package/cpp/llama.cpp/src/llama-vocab.h +43 -39
  551. package/cpp/llama.cpp/src/llama.cpp +69 -10
  552. package/cpp/llama.cpp/src/models/afmoe.cpp +187 -0
  553. package/cpp/llama.cpp/src/models/apertus.cpp +125 -0
  554. package/cpp/llama.cpp/src/models/arcee.cpp +135 -0
  555. package/cpp/llama.cpp/src/models/arctic.cpp +138 -0
  556. package/cpp/llama.cpp/src/models/arwkv7.cpp +86 -0
  557. package/cpp/llama.cpp/src/models/baichuan.cpp +122 -0
  558. package/cpp/llama.cpp/src/models/bailingmoe.cpp +144 -0
  559. package/cpp/llama.cpp/src/models/bailingmoe2.cpp +135 -0
  560. package/cpp/llama.cpp/src/models/bert.cpp +176 -0
  561. package/cpp/llama.cpp/src/models/bitnet.cpp +160 -0
  562. package/cpp/llama.cpp/src/models/bloom.cpp +101 -0
  563. package/cpp/llama.cpp/src/models/chameleon.cpp +178 -0
  564. package/cpp/llama.cpp/src/models/chatglm.cpp +132 -0
  565. package/cpp/llama.cpp/src/models/codeshell.cpp +111 -0
  566. package/cpp/llama.cpp/src/models/cogvlm.cpp +100 -0
  567. package/cpp/llama.cpp/src/models/cohere2-iswa.cpp +131 -0
  568. package/cpp/llama.cpp/src/models/command-r.cpp +122 -0
  569. package/cpp/llama.cpp/src/models/dbrx.cpp +123 -0
  570. package/cpp/llama.cpp/src/models/deci.cpp +135 -0
  571. package/cpp/llama.cpp/src/models/deepseek.cpp +144 -0
  572. package/cpp/llama.cpp/src/models/deepseek2.cpp +237 -0
  573. package/cpp/llama.cpp/src/models/dots1.cpp +134 -0
  574. package/cpp/llama.cpp/src/models/dream.cpp +105 -0
  575. package/cpp/llama.cpp/src/models/ernie4-5-moe.cpp +150 -0
  576. package/cpp/llama.cpp/src/models/ernie4-5.cpp +110 -0
  577. package/cpp/llama.cpp/src/models/exaone.cpp +114 -0
  578. package/cpp/llama.cpp/src/models/exaone4.cpp +123 -0
  579. package/cpp/llama.cpp/src/models/falcon-h1.cpp +113 -0
  580. package/cpp/llama.cpp/src/models/falcon.cpp +120 -0
  581. package/cpp/llama.cpp/src/models/gemma-embedding.cpp +120 -0
  582. package/cpp/llama.cpp/src/models/gemma.cpp +112 -0
  583. package/cpp/llama.cpp/src/models/gemma2-iswa.cpp +125 -0
  584. package/cpp/llama.cpp/src/models/gemma3-iswa.cpp +131 -0
  585. package/cpp/llama.cpp/src/models/gemma3n-iswa.cpp +377 -0
  586. package/cpp/llama.cpp/src/models/glm4-moe.cpp +153 -0
  587. package/cpp/llama.cpp/src/models/glm4.cpp +127 -0
  588. package/cpp/llama.cpp/src/models/gpt2.cpp +105 -0
  589. package/cpp/llama.cpp/src/models/gptneox.cpp +144 -0
  590. package/cpp/llama.cpp/src/models/granite-hybrid.cpp +196 -0
  591. package/cpp/llama.cpp/src/models/granite.cpp +211 -0
  592. package/cpp/llama.cpp/src/models/graph-context-mamba.cpp +283 -0
  593. package/cpp/llama.cpp/src/models/grok.cpp +159 -0
  594. package/cpp/llama.cpp/src/models/grovemoe.cpp +141 -0
  595. package/cpp/llama.cpp/src/models/hunyuan-dense.cpp +132 -0
  596. package/cpp/llama.cpp/src/models/hunyuan-moe.cpp +154 -0
  597. package/cpp/llama.cpp/src/models/internlm2.cpp +120 -0
  598. package/cpp/llama.cpp/src/models/jais.cpp +86 -0
  599. package/cpp/llama.cpp/src/models/jamba.cpp +106 -0
  600. package/cpp/llama.cpp/src/models/lfm2.cpp +173 -0
  601. package/cpp/llama.cpp/src/models/llada-moe.cpp +122 -0
  602. package/cpp/llama.cpp/src/models/llada.cpp +99 -0
  603. package/cpp/llama.cpp/src/models/llama-iswa.cpp +174 -0
  604. package/cpp/llama.cpp/src/models/llama.cpp +155 -0
  605. package/cpp/llama.cpp/src/models/mamba.cpp +55 -0
  606. package/cpp/llama.cpp/src/models/minicpm3.cpp +199 -0
  607. package/cpp/llama.cpp/src/models/minimax-m2.cpp +124 -0
  608. package/cpp/llama.cpp/src/models/models.h +485 -0
  609. package/cpp/llama.cpp/src/models/mpt.cpp +126 -0
  610. package/cpp/llama.cpp/src/models/nemotron-h.cpp +121 -0
  611. package/cpp/llama.cpp/src/models/nemotron.cpp +122 -0
  612. package/cpp/llama.cpp/src/models/neo-bert.cpp +104 -0
  613. package/cpp/llama.cpp/src/models/olmo.cpp +121 -0
  614. package/cpp/llama.cpp/src/models/olmo2.cpp +150 -0
  615. package/cpp/llama.cpp/src/models/olmoe.cpp +124 -0
  616. package/cpp/llama.cpp/src/models/openai-moe-iswa.cpp +124 -0
  617. package/cpp/llama.cpp/src/models/openelm.cpp +124 -0
  618. package/cpp/llama.cpp/src/models/orion.cpp +123 -0
  619. package/cpp/llama.cpp/src/models/pangu-embedded.cpp +121 -0
  620. package/cpp/llama.cpp/src/models/phi2.cpp +121 -0
  621. package/cpp/llama.cpp/src/models/phi3.cpp +152 -0
  622. package/cpp/llama.cpp/src/models/plamo.cpp +110 -0
  623. package/cpp/llama.cpp/src/models/plamo2.cpp +316 -0
  624. package/cpp/llama.cpp/src/models/plm.cpp +168 -0
  625. package/cpp/llama.cpp/src/models/qwen.cpp +108 -0
  626. package/cpp/llama.cpp/src/models/qwen2.cpp +117 -0
  627. package/cpp/llama.cpp/src/models/qwen2moe.cpp +151 -0
  628. package/cpp/llama.cpp/src/models/qwen2vl.cpp +117 -0
  629. package/cpp/llama.cpp/src/models/qwen3.cpp +117 -0
  630. package/cpp/llama.cpp/src/models/qwen3moe.cpp +124 -0
  631. package/cpp/llama.cpp/src/models/qwen3vl-moe.cpp +149 -0
  632. package/cpp/llama.cpp/src/models/qwen3vl.cpp +141 -0
  633. package/cpp/llama.cpp/src/models/refact.cpp +94 -0
  634. package/cpp/llama.cpp/src/models/rwkv6-base.cpp +162 -0
  635. package/cpp/llama.cpp/src/models/rwkv6.cpp +94 -0
  636. package/cpp/llama.cpp/src/models/rwkv6qwen2.cpp +86 -0
  637. package/cpp/llama.cpp/src/models/rwkv7-base.cpp +135 -0
  638. package/cpp/llama.cpp/src/models/rwkv7.cpp +90 -0
  639. package/cpp/llama.cpp/src/models/seed-oss.cpp +124 -0
  640. package/cpp/llama.cpp/src/models/smallthinker.cpp +120 -0
  641. package/cpp/llama.cpp/src/models/smollm3.cpp +128 -0
  642. package/cpp/llama.cpp/src/models/stablelm.cpp +146 -0
  643. package/cpp/llama.cpp/src/models/starcoder.cpp +100 -0
  644. package/cpp/llama.cpp/src/models/starcoder2.cpp +121 -0
  645. package/cpp/llama.cpp/src/models/t5-dec.cpp +166 -0
  646. package/cpp/llama.cpp/src/models/t5-enc.cpp +96 -0
  647. package/cpp/llama.cpp/src/models/wavtokenizer-dec.cpp +149 -0
  648. package/cpp/llama.cpp/src/models/xverse.cpp +108 -0
  649. package/cpp/llama.cpp/src/unicode.cpp +77 -0
  650. package/cpp/llama.cpp/src/unicode.h +43 -0
  651. package/cpp/llama.cpp/vendor/cpp-httplib/CMakeLists.txt +94 -0
  652. package/cpp/llama.cpp/vendor/cpp-httplib/httplib.cpp +9339 -0
  653. package/cpp/llama.cpp/vendor/cpp-httplib/httplib.h +433 -8222
  654. package/cpp/llama.cpp/vendor/cpp-httplib/patch-boringssl.cmake +6 -0
  655. package/cpp/llama.cpp/vendor/miniaudio/miniaudio.h +4179 -1900
  656. package/cpp/llama.cpp/vendor/minja/chat-template.hpp +9 -2
  657. package/cpp/llama.cpp/vendor/minja/minja.hpp +101 -22
  658. package/cpp/rn-completion.cpp +3 -27
  659. package/ios/include/chat.h +16 -3
  660. package/ios/include/common/minja/chat-template.hpp +9 -2
  661. package/ios/include/common/minja/minja.hpp +101 -22
  662. package/ios/include/common.h +57 -19
  663. package/ios/include/json-schema-to-grammar.h +2 -0
  664. package/ios/include/llama.h +44 -21
  665. package/ios/include/log.h +12 -4
  666. package/ios/include/sampling.h +3 -1
  667. package/ios/libs/llama.xcframework/Info.plist +20 -20
  668. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  669. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +6399 -5557
  670. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +19 -1
  671. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +1 -1
  672. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-metal.h +1 -6
  673. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +199 -6
  674. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +44 -21
  675. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  676. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  677. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +6362 -5520
  678. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4813 -4241
  679. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +19 -1
  680. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +1 -1
  681. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +1 -6
  682. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +199 -6
  683. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +44 -21
  684. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  685. package/package.json +10 -4
  686. package/cpp/llama.cpp/ggml/src/ggml-cann/Doxyfile +0 -2579
  687. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +0 -371
  688. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cuh +0 -3
  689. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +0 -379
  690. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cuh +0 -3
  691. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +0 -495
  692. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +0 -486
  693. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
  694. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
  695. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
  696. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
  697. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
  698. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
  699. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
  700. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
  701. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
  702. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
  703. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
  704. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
  705. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
  706. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
  707. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
  708. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
  709. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
  710. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
  711. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
  712. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
  713. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
  714. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
  715. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
  716. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
  717. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
  718. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
  719. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
  720. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
  721. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
  722. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
  723. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
  724. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
  725. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
  726. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
  727. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
  728. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
  729. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
  730. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
  731. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
  732. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
  733. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
  734. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
  735. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
  736. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
  737. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
  738. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
  739. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
  740. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
  741. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
  742. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
  743. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
  744. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
  745. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
  746. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
  747. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
  748. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
  749. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
  750. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
  751. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
  752. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
  753. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
  754. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
  755. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
  756. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
  757. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
  758. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
  759. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
  760. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
  761. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
  762. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
  763. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
  764. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
  765. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
  766. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
  767. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
  768. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
  769. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
  770. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
  771. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
  772. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
  773. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
  774. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
  775. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
  776. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
  777. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
  778. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
  779. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +0 -6886
  780. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +0 -154
  781. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +0 -105
  782. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +0 -55
  783. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +0 -60
  784. package/cpp/llama.cpp/ggml/src/ggml-zdnn/ggml-zdnn-impl.h +0 -97
  785. package/cpp/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  786. package/cpp/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  787. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  788. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
  789. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
  790. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  791. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
  792. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
  793. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  794. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
  795. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
  796. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  797. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
  798. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
  799. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  800. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
  801. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
  802. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  803. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
  804. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
  805. package/cpp/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  806. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  807. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
  808. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
  809. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  810. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
  811. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
  812. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  813. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
  814. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
  815. package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
  816. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  817. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
  818. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
  819. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  820. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
  821. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
  822. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  823. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
  824. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
  825. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  826. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
  827. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
  828. package/cpp/llama.cpp/models/templates/ByteDance-Seed-OSS.jinja +0 -171
  829. package/cpp/llama.cpp/models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja +0 -202
  830. package/cpp/llama.cpp/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja +0 -156
  831. package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +0 -124
  832. package/cpp/llama.cpp/models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja +0 -152
  833. package/cpp/llama.cpp/models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja +0 -152
  834. package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +0 -62
  835. package/cpp/llama.cpp/models/templates/Qwen-Qwen2.5-7B-Instruct.jinja +0 -54
  836. package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +0 -85
  837. package/cpp/llama.cpp/models/templates/README.md +0 -25
  838. package/cpp/llama.cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja +0 -1
  839. package/cpp/llama.cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja +0 -1
  840. package/cpp/llama.cpp/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja +0 -57
  841. package/cpp/llama.cpp/models/templates/google-gemma-2-2b-it.jinja +0 -4
  842. package/cpp/llama.cpp/models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja +0 -59
  843. package/cpp/llama.cpp/models/templates/llama-cpp-deepseek-r1.jinja +0 -76
  844. package/cpp/llama.cpp/models/templates/llama-cpp-rwkv-world.jinja +0 -34
  845. package/cpp/llama.cpp/models/templates/meetkai-functionary-medium-v3.1.jinja +0 -58
  846. package/cpp/llama.cpp/models/templates/meetkai-functionary-medium-v3.2.jinja +0 -287
  847. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja +0 -109
  848. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja +0 -93
  849. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja +0 -109
  850. package/cpp/llama.cpp/models/templates/microsoft-Phi-3.5-mini-instruct.jinja +0 -8
  851. package/cpp/llama.cpp/models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja +0 -87
  852. package/cpp/llama.cpp/models/templates/moonshotai-Kimi-K2.jinja +0 -43
  853. package/cpp/llama.cpp/models/templates/openai-gpt-oss-120b.jinja +0 -331
  854. package/cpp/llama.cpp/models/templates/unsloth-mistral-Devstral-Small-2507.jinja +0 -105
  855. package/cpp/llama.cpp/prompts/LLM-questions.txt +0 -49
  856. package/cpp/llama.cpp/prompts/alpaca.txt +0 -1
  857. package/cpp/llama.cpp/prompts/assistant.txt +0 -31
  858. package/cpp/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
  859. package/cpp/llama.cpp/prompts/chat-with-bob.txt +0 -7
  860. package/cpp/llama.cpp/prompts/chat-with-qwen.txt +0 -1
  861. package/cpp/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
  862. package/cpp/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
  863. package/cpp/llama.cpp/prompts/chat.txt +0 -28
  864. package/cpp/llama.cpp/prompts/dan-modified.txt +0 -1
  865. package/cpp/llama.cpp/prompts/dan.txt +0 -1
  866. package/cpp/llama.cpp/prompts/mnemonics.txt +0 -93
  867. package/cpp/llama.cpp/prompts/parallel-questions.txt +0 -43
  868. package/cpp/llama.cpp/prompts/reason-act.txt +0 -18
  869. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
  870. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  871. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5524
  872. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +0 -4247
  873. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-alloc.h +0 -76
  874. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +0 -354
  875. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-blas.h +0 -25
  876. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +0 -145
  877. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-metal.h +0 -66
  878. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +0 -256
  879. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +0 -2492
  880. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/gguf.h +0 -202
  881. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +0 -1391
  882. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Modules/module.modulemap +0 -17
  883. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Resources/Info.plist +0 -32
  884. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-alloc.h +0 -76
  885. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +0 -354
  886. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-blas.h +0 -25
  887. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +0 -145
  888. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-metal.h +0 -66
  889. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +0 -256
  890. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +0 -2492
  891. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/gguf.h +0 -202
  892. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +0 -1391
  893. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Modules/module.modulemap +0 -17
  894. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Resources/Info.plist +0 -32
  895. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  896. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-alloc.h +0 -76
  897. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +0 -354
  898. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-blas.h +0 -25
  899. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +0 -145
  900. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-metal.h +0 -66
  901. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +0 -256
  902. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +0 -2492
  903. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/gguf.h +0 -202
  904. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +0 -1391
  905. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Modules/module.modulemap +0 -17
  906. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Resources/Info.plist +0 -32
  907. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  908. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  909. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
  910. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  911. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5561
  912. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-alloc.h +0 -76
  913. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +0 -354
  914. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-blas.h +0 -25
  915. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +0 -145
  916. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-metal.h +0 -66
  917. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +0 -256
  918. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +0 -2492
  919. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/gguf.h +0 -202
  920. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +0 -1391
  921. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Info.plist +0 -35
  922. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Modules/module.modulemap +0 -17
  923. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  924. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
  925. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  926. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5524
  927. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +0 -4246
  928. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +0 -76
  929. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +0 -354
  930. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +0 -25
  931. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +0 -145
  932. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +0 -66
  933. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +0 -256
  934. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +0 -2492
  935. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +0 -202
  936. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -1391
  937. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Info.plist +0 -35
  938. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +0 -17
  939. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  940. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
  941. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  942. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5558
  943. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-alloc.h +0 -76
  944. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +0 -354
  945. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-blas.h +0 -25
  946. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +0 -145
  947. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-metal.h +0 -66
  948. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +0 -256
  949. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +0 -2492
  950. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/gguf.h +0 -202
  951. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +0 -1391
  952. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Info.plist +0 -32
  953. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Modules/module.modulemap +0 -17
  954. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  955. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
  956. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  957. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5520
  958. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +0 -4243
  959. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +0 -76
  960. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +0 -354
  961. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +0 -25
  962. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +0 -145
  963. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +0 -66
  964. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +0 -256
  965. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +0 -2492
  966. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +0 -202
  967. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -1391
  968. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Info.plist +0 -32
  969. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +0 -17
  970. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  971. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{test_bfloat16_support.comp → feature-tests/bfloat16.comp} +0 -0
  972. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat_support.comp → feature-tests/coopmat.comp} +0 -0
  973. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat2_support.comp → feature-tests/coopmat2.comp} +0 -0
  974. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{test_integer_dot_support.comp → feature-tests/integer_dot.comp} +0 -0
  975. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{generic_head.comp → generic_head.glsl} +0 -0
  976. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{generic_unary_head.comp → generic_unary_head.glsl} +0 -0
  977. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{glu_main.comp → glu_main.glsl} +0 -0
  978. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{rte.comp → rte.glsl} +0 -0
  979. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{utils.comp → utils.glsl} +0 -0
@@ -0,0 +1,1662 @@
1
+ #include "ggml-metal-device.h"
2
+
3
+ #include "ggml-metal-impl.h"
4
+
5
+ #include "ggml-impl.h"
6
+
7
+ #include <cassert>
8
+ #include <memory>
9
+ #include <string>
10
+ #include <unordered_map>
11
+
12
+ struct ggml_metal_device_deleter {
13
+ void operator()(ggml_metal_device_t ctx) {
14
+ ggml_metal_device_free(ctx);
15
+ }
16
+ };
17
+
18
+ typedef std::unique_ptr<ggml_metal_device, ggml_metal_device_deleter> ggml_metal_device_ptr;
19
+
20
+ ggml_metal_device_t ggml_metal_device_get(void) {
21
+ static ggml_metal_device_ptr ctx { ggml_metal_device_init() };
22
+
23
+ return ctx.get();
24
+ }
25
+
26
+ struct ggml_metal_pipelines {
27
+ std::unordered_map<std::string, ggml_metal_pipeline_t> data;
28
+ };
29
+
30
+ ggml_metal_pipelines_t ggml_metal_pipelines_init(void) {
31
+ ggml_metal_pipelines_t res = new ggml_metal_pipelines();
32
+
33
+ return res;
34
+ }
35
+
36
+ void ggml_metal_pipelines_free(ggml_metal_pipelines_t ppls) {
37
+ if (!ppls) {
38
+ return;
39
+ }
40
+
41
+ for (auto it = ppls->data.begin(); it != ppls->data.end(); ++it) {
42
+ ggml_metal_pipeline_free(it->second);
43
+ }
44
+
45
+ delete ppls;
46
+ }
47
+
48
+ void ggml_metal_pipelines_add(ggml_metal_pipelines_t ppls, const char * name, ggml_metal_pipeline_t pipeline) {
49
+ ppls->data[name] = pipeline;
50
+ }
51
+
52
+ ggml_metal_pipeline_t ggml_metal_pipelines_get(ggml_metal_pipelines_t ppls, const char * name) {
53
+ if (ppls->data.find(name) == ppls->data.end()) {
54
+ return nullptr;
55
+ }
56
+
57
+ return ppls->data[name];
58
+ }
59
+
60
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_base(ggml_metal_library_t lib, ggml_op op) {
61
+ char base[256];
62
+ char name[256];
63
+
64
+ const char * op_str = "undefined";
65
+ switch (op) {
66
+ case GGML_OP_ADD_ID: op_str = "add_id"; break;
67
+ case GGML_OP_CONCAT: op_str = "concat"; break;
68
+ default: GGML_ABORT("fatal error");
69
+ };
70
+
71
+ snprintf(base, 256, "kernel_%s", op_str);
72
+ snprintf(name, 256, "%s", base);
73
+
74
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
75
+ if (res) {
76
+ return res;
77
+ }
78
+
79
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
80
+
81
+ return res;
82
+ }
83
+
84
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_cpy(ggml_metal_library_t lib, ggml_type tsrc, ggml_type tdst) {
85
+ char base[256];
86
+ char name[256];
87
+
88
+ snprintf(base, 256, "kernel_cpy_%s_%s", ggml_type_name(tsrc), ggml_type_name(tdst));
89
+ snprintf(name, 256, "%s", base);
90
+
91
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
92
+ if (res) {
93
+ return res;
94
+ }
95
+
96
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
97
+
98
+ return res;
99
+ }
100
+
101
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_pool_2d(ggml_metal_library_t lib, const ggml_tensor * op, ggml_op_pool op_pool) {
102
+ GGML_ASSERT(ggml_is_contiguous(op->src[0]));
103
+ GGML_ASSERT(op->src[0]->type == GGML_TYPE_F32 && op->src[0]->type == op->type);
104
+
105
+ const char * pool_str = "undefined";
106
+ switch (op_pool) {
107
+ case GGML_OP_POOL_AVG: pool_str = "avg"; break;
108
+ case GGML_OP_POOL_MAX: pool_str = "max"; break;
109
+ default: GGML_ASSERT(false && "not implemented");
110
+ };
111
+
112
+ char base[256];
113
+ char name[256];
114
+
115
+ snprintf(base, 256, "kernel_pool_2d_%s_%s", pool_str, ggml_type_name(op->src[0]->type));
116
+ snprintf(name, 256, "%s", base);
117
+
118
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
119
+ if (res) {
120
+ return res;
121
+ }
122
+
123
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
124
+
125
+ return res;
126
+ }
127
+
128
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_get_rows(ggml_metal_library_t lib, ggml_type tsrc) {
129
+ char base[256];
130
+ char name[256];
131
+
132
+ snprintf(base, 256, "kernel_get_rows_%s", ggml_type_name(tsrc));
133
+ snprintf(name, 256, "%s", base);
134
+
135
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
136
+ if (res) {
137
+ return res;
138
+ }
139
+
140
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
141
+
142
+ return res;
143
+ }
144
+
145
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_set_rows(ggml_metal_library_t lib, ggml_type tidx, ggml_type tdst) {
146
+ char base[256];
147
+ char name[256];
148
+
149
+ snprintf(base, 256, "kernel_set_rows_%s_%s", ggml_type_name(tdst), ggml_type_name(tidx));
150
+ snprintf(name, 256, "%s", base);
151
+
152
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
153
+ if (res) {
154
+ return res;
155
+ }
156
+
157
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
158
+
159
+ return res;
160
+ }
161
+
162
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_repeat(ggml_metal_library_t lib, ggml_type tsrc) {
163
+ char base[256];
164
+ char name[256];
165
+
166
+ snprintf(base, 256, "kernel_repeat_%s", ggml_type_name(tsrc));
167
+ snprintf(name, 256, "%s", base);
168
+
169
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
170
+ if (res) {
171
+ return res;
172
+ }
173
+
174
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
175
+
176
+ return res;
177
+ }
178
+
179
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_unary(ggml_metal_library_t lib, const ggml_tensor * op) {
180
+ GGML_ASSERT(ggml_is_contiguous(op->src[0]));
181
+
182
+ char base[256];
183
+ char name[256];
184
+
185
+ const int64_t n = ggml_nelements(op);
186
+
187
+ const char * op_str = "undefined";
188
+ switch (op->op) {
189
+ case GGML_OP_SCALE: op_str = "scale"; break;
190
+ case GGML_OP_CLAMP: op_str = "clamp"; break;
191
+ case GGML_OP_SQR: op_str = "sqr"; break;
192
+ case GGML_OP_SQRT: op_str = "sqrt"; break;
193
+ case GGML_OP_SIN: op_str = "sin"; break;
194
+ case GGML_OP_COS: op_str = "cos"; break;
195
+ case GGML_OP_LOG: op_str = "log"; break;
196
+ case GGML_OP_LEAKY_RELU: op_str = "leaky_relu"; break;
197
+ case GGML_OP_UNARY:
198
+ switch (ggml_get_unary_op(op)) {
199
+ case GGML_UNARY_OP_TANH: op_str = "tanh"; break;
200
+ case GGML_UNARY_OP_RELU: op_str = "relu"; break;
201
+ case GGML_UNARY_OP_SIGMOID: op_str = "sigmoid"; break;
202
+ case GGML_UNARY_OP_GELU: op_str = "gelu"; break;
203
+ case GGML_UNARY_OP_GELU_ERF: op_str = "gelu_erf"; break;
204
+ case GGML_UNARY_OP_GELU_QUICK: op_str = "gelu_quick"; break;
205
+ case GGML_UNARY_OP_SILU: op_str = "silu"; break;
206
+ case GGML_UNARY_OP_ELU: op_str = "elu"; break;
207
+ case GGML_UNARY_OP_NEG: op_str = "neg"; break;
208
+ case GGML_UNARY_OP_ABS: op_str = "abs"; break;
209
+ case GGML_UNARY_OP_SGN: op_str = "sgn"; break;
210
+ case GGML_UNARY_OP_STEP: op_str = "step"; break;
211
+ case GGML_UNARY_OP_HARDSWISH: op_str = "hardswish"; break;
212
+ case GGML_UNARY_OP_HARDSIGMOID: op_str = "hardsigmoid"; break;
213
+ case GGML_UNARY_OP_EXP: op_str = "exp"; break;
214
+ default: GGML_ABORT("fatal error");
215
+ } break;
216
+ default: GGML_ABORT("fatal error");
217
+ };
218
+
219
+ const char * suffix = "";
220
+ if (n % 4 == 0) {
221
+ suffix = "_4";
222
+ }
223
+
224
+ snprintf(base, 256, "kernel_%s_%s%s", op_str, ggml_type_name(op->src[0]->type), suffix);
225
+ snprintf(name, 256, "%s", base);
226
+
227
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
228
+ if (res) {
229
+ return res;
230
+ }
231
+
232
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
233
+
234
+ return res;
235
+ }
236
+
237
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_glu(ggml_metal_library_t lib, const ggml_tensor * op) {
238
+ GGML_ASSERT(ggml_is_contiguous_1(op->src[0]));
239
+
240
+ char base[256];
241
+ char name[256];
242
+
243
+ const char * op_str = "undefined";
244
+ switch (op->op) {
245
+ case GGML_OP_GLU:
246
+ switch (ggml_get_glu_op(op)) {
247
+ case GGML_GLU_OP_REGLU: op_str = "reglu"; break;
248
+ case GGML_GLU_OP_GEGLU: op_str = "geglu"; break;
249
+ case GGML_GLU_OP_SWIGLU: op_str = "swiglu"; break;
250
+ case GGML_GLU_OP_SWIGLU_OAI: op_str = "swiglu_oai"; break;
251
+ case GGML_GLU_OP_GEGLU_ERF: op_str = "geglu_erf"; break;
252
+ case GGML_GLU_OP_GEGLU_QUICK: op_str = "geglu_quick"; break;
253
+ default: GGML_ABORT("fatal error");
254
+ } break;
255
+ default: GGML_ABORT("fatal error");
256
+ };
257
+
258
+ snprintf(base, 256, "kernel_%s_%s", op_str, ggml_type_name(op->src[0]->type));
259
+ snprintf(name, 256, "%s", base);
260
+
261
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
262
+ if (res) {
263
+ return res;
264
+ }
265
+
266
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
267
+
268
+ return res;
269
+ }
270
+
271
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_sum(ggml_metal_library_t lib, const ggml_tensor * op) {
272
+ assert(op->op == GGML_OP_SUM);
273
+
274
+ char base[256];
275
+ char name[256];
276
+
277
+ snprintf(base, 256, "kernel_op_sum_%s", ggml_type_name(op->src[0]->type));
278
+ snprintf(name, 256, "%s", base);
279
+
280
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
281
+ if (res) {
282
+ return res;
283
+ }
284
+
285
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
286
+
287
+ return res;
288
+ }
289
+
290
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_sum_rows(ggml_metal_library_t lib, const ggml_tensor * op) {
291
+ GGML_ASSERT(op->src[0]->nb[0] == ggml_type_size(op->src[0]->type));
292
+
293
+ char base[256];
294
+ char name[256];
295
+
296
+ const char * op_str = "undefined";
297
+ switch (op->op) {
298
+ case GGML_OP_SUM_ROWS:
299
+ op_str = "sum_rows"; break;
300
+ case GGML_OP_MEAN:
301
+ op_str = "mean"; break;
302
+ default: GGML_ABORT("fatal error");
303
+ };
304
+
305
+ snprintf(base, 256, "kernel_%s_%s", op_str, ggml_type_name(op->src[0]->type));
306
+
307
+ snprintf(name, 256, "%s", base);
308
+
309
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
310
+ if (res) {
311
+ return res;
312
+ }
313
+
314
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
315
+
316
+ ggml_metal_pipeline_set_smem(res, 32*sizeof(float));
317
+
318
+ return res;
319
+ }
320
+
321
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_cumsum_blk(ggml_metal_library_t lib, const ggml_tensor * op) {
322
+ GGML_ASSERT(op->op == GGML_OP_CUMSUM);
323
+
324
+ char base[256];
325
+ char name[256];
326
+
327
+ snprintf(base, 256, "kernel_cumsum_blk_%s", ggml_type_name(op->src[0]->type));
328
+ snprintf(name, 256, "%s", base);
329
+
330
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
331
+ if (res) {
332
+ return res;
333
+ }
334
+
335
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
336
+
337
+ return res;
338
+ }
339
+
340
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_cumsum_add(ggml_metal_library_t lib, const ggml_tensor * op) {
341
+ GGML_ASSERT(op->op == GGML_OP_CUMSUM);
342
+
343
+ char base[256];
344
+ char name[256];
345
+
346
+ snprintf(base, 256, "kernel_cumsum_add_%s", ggml_type_name(op->src[0]->type));
347
+ snprintf(name, 256, "%s", base);
348
+
349
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
350
+ if (res) {
351
+ return res;
352
+ }
353
+
354
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
355
+
356
+ return res;
357
+ }
358
+
359
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_soft_max(ggml_metal_library_t lib, const ggml_tensor * op) {
360
+ GGML_ASSERT(!op->src[1] || op->src[1]->type == GGML_TYPE_F16 || op->src[1]->type == GGML_TYPE_F32);
361
+
362
+ char base[256];
363
+ char name[256];
364
+
365
+ const char * suffix = "";
366
+
367
+ if (op->src[0]->ne[0] % 4 == 0) {
368
+ suffix = "_4";
369
+ }
370
+
371
+ const ggml_type tsrc1 = op->src[1] ? op->src[1]->type : GGML_TYPE_F32;
372
+
373
+ snprintf(base, 256, "kernel_soft_max_%s%s", ggml_type_name(tsrc1), suffix);
374
+ snprintf(name, 256, "%s", base);
375
+
376
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
377
+ if (res) {
378
+ return res;
379
+ }
380
+
381
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
382
+
383
+ ggml_metal_pipeline_set_smem(res, 32*sizeof(float));
384
+
385
+ return res;
386
+ }
387
+
388
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_ssm_conv(ggml_metal_library_t lib, const ggml_tensor * op) {
389
+ GGML_ASSERT(op->src[0]->type == GGML_TYPE_F32);
390
+ GGML_ASSERT(op->src[1]->type == GGML_TYPE_F32);
391
+
392
+ GGML_ASSERT(ggml_is_contiguous(op->src[0]));
393
+ GGML_ASSERT(ggml_is_contiguous(op->src[1]));
394
+
395
+ char base[256];
396
+ char name[256];
397
+
398
+ const char * suffix = "";
399
+
400
+ if (op->src[1]->ne[0] % 4 == 0) {
401
+ suffix = "_4";
402
+ }
403
+
404
+ snprintf(base, 256, "kernel_ssm_conv_%s_%s%s", ggml_type_name(op->src[0]->type), ggml_type_name(op->src[1]->type), suffix);
405
+ snprintf(name, 256, "%s", base);
406
+
407
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
408
+ if (res) {
409
+ return res;
410
+ }
411
+
412
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
413
+
414
+ return res;
415
+ }
416
+
417
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_ssm_scan(ggml_metal_library_t lib, const ggml_tensor * op) {
418
+ GGML_TENSOR_LOCALS( int32_t, ne0, op->src[0], ne);
419
+
420
+ char base[256];
421
+ char name[256];
422
+
423
+ const int nsg = (ne00 + 31)/32;
424
+
425
+ snprintf(base, 256, "kernel_ssm_scan_%s", ggml_type_name(op->src[0]->type));
426
+ snprintf(name, 256, "%s_nsg=%d", base, nsg);
427
+
428
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
429
+ if (res) {
430
+ return res;
431
+ }
432
+
433
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
434
+
435
+ ggml_metal_pipeline_set_smem(res, 32*sizeof(float)*nsg);
436
+
437
+ return res;
438
+ }
439
+
440
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_rwkv(ggml_metal_library_t lib, const ggml_tensor * op) {
441
+ char base[256];
442
+ char name[256];
443
+
444
+ const int64_t C = op->ne[0];
445
+ const int64_t H = op->src[0]->ne[1];
446
+
447
+ switch (op->op) {
448
+ case GGML_OP_RWKV_WKV6:
449
+ {
450
+ GGML_ASSERT(op->src[5]->type == GGML_TYPE_F32);
451
+ GGML_ASSERT(C % H == 0);
452
+ GGML_ASSERT(C / H == 64);
453
+
454
+ snprintf(base, 256, "kernel_rwkv_wkv6_%s", ggml_type_name(op->src[0]->type));
455
+ } break;
456
+ case GGML_OP_RWKV_WKV7:
457
+ {
458
+ GGML_ASSERT(op->src[6]->type == GGML_TYPE_F32);
459
+ GGML_ASSERT(C % H == 0);
460
+ GGML_ASSERT(C / H == 64);
461
+
462
+ snprintf(base, 256, "kernel_rwkv_wkv7_%s", ggml_type_name(op->src[0]->type));
463
+ } break;
464
+ default:
465
+ GGML_ABORT("fatal error");
466
+ }
467
+
468
+ snprintf(name, 256, "%s", base);
469
+
470
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
471
+ if (res) {
472
+ return res;
473
+ }
474
+
475
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
476
+
477
+ return res;
478
+ }
479
+
480
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_mul_mv_ext(ggml_metal_library_t lib, ggml_type tsrc0, ggml_type tsrc1, int nsg, int nxpsg, int r1ptg) {
481
+ char base[256];
482
+ char name[256];
483
+
484
+ snprintf(base, 256, "kernel_mul_mv_ext_%s_%s_r1_%d", ggml_type_name(tsrc0), ggml_type_name(tsrc1), r1ptg);
485
+ snprintf(name, 256, "%s_nsg=%d_nxpsg=%d", base, nsg, nxpsg);
486
+
487
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
488
+ if (res) {
489
+ return res;
490
+ }
491
+
492
+ ggml_metal_cv_t cv = ggml_metal_cv_init();
493
+
494
+ ggml_metal_cv_set_int16(cv, nsg, FC_MUL_MV + 0);
495
+ ggml_metal_cv_set_int16(cv, nxpsg, FC_MUL_MV + 1);
496
+
497
+ res = ggml_metal_library_compile_pipeline(lib, base, name, cv);
498
+
499
+ ggml_metal_cv_free(cv);
500
+
501
+ return res;
502
+ }
503
+
504
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_mul_mm(ggml_metal_library_t lib, const ggml_tensor * op) {
505
+ char base[256];
506
+ char name[256];
507
+
508
+ const ggml_type tsrc0 = op->src[0]->type;
509
+ const ggml_type tsrc1 = op->src[1]->type;
510
+
511
+ const bool bc_inp = op->src[0]->ne[0] % 32 != 0;
512
+ const bool bc_out = op->ne[0] % 64 != 0 || op->ne[1] % 32 != 0;
513
+
514
+ snprintf(base, 256, "kernel_mul_mm_%s_%s", ggml_type_name(tsrc0), ggml_type_name(tsrc1));
515
+ snprintf(name, 256, "%s_bci=%d_bco=%d", base, bc_inp, bc_out);
516
+
517
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
518
+ if (res) {
519
+ return res;
520
+ }
521
+
522
+ ggml_metal_cv_t cv = ggml_metal_cv_init();
523
+
524
+ ggml_metal_cv_set_bool(cv, bc_inp, FC_MUL_MM + 0);
525
+ ggml_metal_cv_set_bool(cv, bc_out, FC_MUL_MM + 1);
526
+
527
+ res = ggml_metal_library_compile_pipeline(lib, base, name, cv);
528
+
529
+ ggml_metal_cv_free(cv);
530
+
531
+ // when the output size is not multiple of 64x32, we need extra smem to prevent out-of-bounds writes
532
+ ggml_metal_pipeline_set_smem(res, bc_out ? 8192 : 4096 + 2048);
533
+
534
+ return res;
535
+ }
536
+
537
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_mul_mv(ggml_metal_library_t lib, const ggml_tensor * op) {
538
+ GGML_TENSOR_LOCALS( int32_t, ne0, op->src[0], ne);
539
+ GGML_TENSOR_LOCALS( int32_t, ne1, op->src[1], ne);
540
+
541
+ char base[256];
542
+ char name[256];
543
+
544
+ int nsg = 0; // number of simdgroups
545
+ int nr0 = 0; // number of src0 rows per simdgroup
546
+ int nr1 = 1; // number of src1 rows per threadgroup
547
+
548
+ size_t smem = 0; // shared memory
549
+
550
+ const ggml_type tsrc0 = op->src[0]->type;
551
+ const ggml_type tsrc1 = op->src[1]->type;
552
+
553
+ const char * suffix = "";
554
+
555
+ // use custom matrix x vector kernel
556
+ switch (tsrc0) {
557
+ case GGML_TYPE_F32:
558
+ case GGML_TYPE_F16:
559
+ case GGML_TYPE_BF16:
560
+ {
561
+ if (ne00 < 32) {
562
+ nsg = 1;
563
+ nr0 = 32;
564
+ nr1 = 1;
565
+ suffix = "_short";
566
+ } else {
567
+ nsg = std::min(4, (ne00 + 127) / 128);
568
+ nr0 = 2;
569
+ nr1 = 1;
570
+ smem = 32*sizeof(float)*nr0;
571
+ suffix = ne00 % 4 == 0 ? "_4" : "";
572
+ }
573
+ } break;
574
+ case GGML_TYPE_Q4_0:
575
+ {
576
+ nsg = N_SG_Q4_0;
577
+ nr0 = N_R0_Q4_0;
578
+ } break;
579
+ case GGML_TYPE_Q4_1:
580
+ {
581
+ nsg = N_SG_Q4_1;
582
+ nr0 = N_R0_Q4_1;
583
+ } break;
584
+ case GGML_TYPE_Q5_0:
585
+ {
586
+ nsg = N_SG_Q5_0;
587
+ nr0 = N_R0_Q5_0;
588
+ } break;
589
+ case GGML_TYPE_Q5_1:
590
+ {
591
+ nsg = N_SG_Q5_1;
592
+ nr0 = N_R0_Q5_1;
593
+ } break;
594
+ case GGML_TYPE_Q8_0:
595
+ {
596
+ nsg = N_SG_Q8_0;
597
+ nr0 = N_R0_Q8_0;
598
+ smem = 32*sizeof(float)*N_R0_Q8_0;
599
+ } break;
600
+ case GGML_TYPE_MXFP4:
601
+ {
602
+ nsg = N_SG_MXFP4;
603
+ nr0 = N_R0_MXFP4;
604
+ smem = 32*sizeof(float);
605
+ } break;
606
+ case GGML_TYPE_Q2_K:
607
+ {
608
+ nsg = N_SG_Q2_K;
609
+ nr0 = N_R0_Q2_K;
610
+ } break;
611
+ case GGML_TYPE_Q3_K:
612
+ {
613
+ nsg = N_SG_Q3_K;
614
+ nr0 = N_R0_Q3_K;
615
+ } break;
616
+ case GGML_TYPE_Q4_K:
617
+ {
618
+ nsg = N_SG_Q4_K;
619
+ nr0 = N_R0_Q4_K;
620
+ } break;
621
+ case GGML_TYPE_Q5_K:
622
+ {
623
+ nsg = N_SG_Q5_K;
624
+ nr0 = N_R0_Q5_K;
625
+ } break;
626
+ case GGML_TYPE_Q6_K:
627
+ {
628
+ nsg = N_SG_Q6_K;
629
+ nr0 = N_R0_Q6_K;
630
+ } break;
631
+ case GGML_TYPE_IQ2_XXS:
632
+ {
633
+ nsg = N_SG_IQ2_XXS;
634
+ nr0 = N_R0_IQ2_XXS;
635
+ smem = 256*8+128;
636
+ } break;
637
+ case GGML_TYPE_IQ2_XS:
638
+ {
639
+ nsg = N_SG_IQ2_XS;
640
+ nr0 = N_R0_IQ2_XS;
641
+ smem = 512*8+128;
642
+ } break;
643
+ case GGML_TYPE_IQ3_XXS:
644
+ {
645
+ nsg = N_SG_IQ3_XXS;
646
+ nr0 = N_R0_IQ3_XXS;
647
+ smem = 256*4+128;
648
+ } break;
649
+ case GGML_TYPE_IQ3_S:
650
+ {
651
+ nsg = N_SG_IQ3_S;
652
+ nr0 = N_R0_IQ3_S;
653
+ smem = 512*4;
654
+ } break;
655
+ case GGML_TYPE_IQ2_S:
656
+ {
657
+ nsg = N_SG_IQ2_S;
658
+ nr0 = N_R0_IQ2_S;
659
+ } break;
660
+ case GGML_TYPE_IQ1_S:
661
+ {
662
+ nsg = N_SG_IQ1_S;
663
+ nr0 = N_R0_IQ1_S;
664
+ } break;
665
+ case GGML_TYPE_IQ1_M:
666
+ {
667
+ nsg = N_SG_IQ1_M;
668
+ nr0 = N_R0_IQ1_M;
669
+ } break;
670
+ case GGML_TYPE_IQ4_NL:
671
+ {
672
+ nsg = N_SG_IQ4_NL;
673
+ nr0 = N_R0_IQ4_NL;
674
+ smem = 32*sizeof(float);
675
+ } break;
676
+ case GGML_TYPE_IQ4_XS:
677
+ {
678
+ nsg = N_SG_IQ4_XS;
679
+ nr0 = N_R0_IQ4_XS;
680
+ smem = 32*sizeof(float);
681
+ } break;
682
+ default:
683
+ {
684
+ GGML_LOG_ERROR("Asserting on type %d\n", (int) tsrc0);
685
+ GGML_ABORT("not implemented");
686
+ }
687
+ };
688
+
689
+ snprintf(base, 256, "kernel_mul_mv_%s_%s%s", ggml_type_name(tsrc0), ggml_type_name(tsrc1), suffix);
690
+ snprintf(name, 256, "%s_nsg=%d", base, nsg);
691
+
692
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
693
+ if (res) {
694
+ return res;
695
+ }
696
+
697
+ ggml_metal_cv_t cv = ggml_metal_cv_init();
698
+
699
+ ggml_metal_cv_set_int16(cv, nsg, FC_MUL_MV + 0);
700
+
701
+ res = ggml_metal_library_compile_pipeline(lib, base, name, cv);
702
+
703
+ ggml_metal_cv_free(cv);
704
+
705
+ ggml_metal_pipeline_set_nr0 (res, nr0);
706
+ ggml_metal_pipeline_set_nr1 (res, nr1);
707
+ ggml_metal_pipeline_set_nsg (res, nsg);
708
+ ggml_metal_pipeline_set_smem(res, smem);
709
+
710
+ return res;
711
+ }
712
+
713
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_mul_mm_id_map0(ggml_metal_library_t lib, int ne02, int ne20) {
714
+ char base[256];
715
+ char name[256];
716
+
717
+ snprintf(base, 256, "kernel_mul_mm_id_map0_ne20_%d", ne20);
718
+ snprintf(name, 256, "%s_ne02=%d", base, ne02);
719
+
720
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
721
+ if (res) {
722
+ return res;
723
+ }
724
+
725
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
726
+
727
+ const size_t smem = (size_t) ne02*ne20*sizeof(uint16_t);
728
+
729
+ ggml_metal_pipeline_set_smem(res, smem);
730
+
731
+ return res;
732
+ }
733
+
734
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_mul_mm_id(ggml_metal_library_t lib, const ggml_tensor * op) {
735
+ char base[256];
736
+ char name[256];
737
+
738
+ const ggml_type tsrc0 = op->src[0]->type;
739
+ const ggml_type tsrc1 = op->src[1]->type;
740
+
741
+ const bool bc_inp = op->src[0]->ne[0] % 32 != 0;
742
+
743
+ snprintf(base, 256, "kernel_mul_mm_id_%s_%s", ggml_type_name(tsrc0), ggml_type_name(tsrc1));
744
+ snprintf(name, 256, "%s_bci=%d", base, bc_inp);
745
+
746
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
747
+ if (res) {
748
+ return res;
749
+ }
750
+
751
+ ggml_metal_cv_t cv = ggml_metal_cv_init();
752
+
753
+ ggml_metal_cv_set_bool(cv, bc_inp, FC_MUL_MM + 0);
754
+
755
+ res = ggml_metal_library_compile_pipeline(lib, base, name, cv);
756
+
757
+ ggml_metal_cv_free(cv);
758
+
759
+ ggml_metal_pipeline_set_smem(res, 8192);
760
+
761
+ return res;
762
+ }
763
+
764
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_mul_mv_id(ggml_metal_library_t lib, const ggml_tensor * op) {
765
+ GGML_TENSOR_LOCALS( int32_t, ne0, op->src[0], ne);
766
+ GGML_TENSOR_LOCALS( int32_t, ne1, op->src[1], ne);
767
+
768
+ char base[256];
769
+ char name[256];
770
+
771
+ int nsg = 0; // number of simdgroups
772
+ int nr0 = 0; // number of src0 rows per simdgroup
773
+ int nr1 = 1; // number of src1 rows per threadgroup
774
+
775
+ size_t smem = 0; // shared memory
776
+
777
+ const ggml_type tsrc0 = op->src[0]->type;
778
+ const ggml_type tsrc1 = op->src[1]->type;
779
+
780
+ const char * suffix = "";
781
+
782
+ // use custom matrix x vector kernel
783
+ switch (tsrc0) {
784
+ case GGML_TYPE_F32:
785
+ case GGML_TYPE_F16:
786
+ case GGML_TYPE_BF16:
787
+ {
788
+ nsg = std::min(4, (ne00 + 127) / 128);
789
+ nr0 = 2;
790
+ nr1 = 1;
791
+ smem = 32*sizeof(float)*nr0;
792
+ suffix = ne00 % 4 == 0 ? "_4" : "";
793
+ } break;
794
+ case GGML_TYPE_Q4_0:
795
+ {
796
+ nsg = N_SG_Q4_0;
797
+ nr0 = N_R0_Q4_0;
798
+ } break;
799
+ case GGML_TYPE_Q4_1:
800
+ {
801
+ nsg = N_SG_Q4_1;
802
+ nr0 = N_R0_Q4_1;
803
+ } break;
804
+ case GGML_TYPE_Q5_0:
805
+ {
806
+ nsg = N_SG_Q5_0;
807
+ nr0 = N_R0_Q5_0;
808
+ } break;
809
+ case GGML_TYPE_Q5_1:
810
+ {
811
+ nsg = N_SG_Q5_1;
812
+ nr0 = N_R0_Q5_1;
813
+ } break;
814
+ case GGML_TYPE_Q8_0:
815
+ {
816
+ nsg = N_SG_Q8_0;
817
+ nr0 = N_R0_Q8_0;
818
+ smem = 32*sizeof(float)*N_R0_Q8_0;
819
+ } break;
820
+ case GGML_TYPE_MXFP4:
821
+ {
822
+ nsg = N_SG_MXFP4;
823
+ nr0 = N_R0_MXFP4;
824
+ smem = 32*sizeof(float);
825
+ } break;
826
+ case GGML_TYPE_Q2_K:
827
+ {
828
+ nsg = N_SG_Q2_K;
829
+ nr0 = N_R0_Q2_K;
830
+ } break;
831
+ case GGML_TYPE_Q3_K:
832
+ {
833
+ nsg = N_SG_Q3_K;
834
+ nr0 = N_R0_Q3_K;
835
+ } break;
836
+ case GGML_TYPE_Q4_K:
837
+ {
838
+ nsg = N_SG_Q4_K;
839
+ nr0 = N_R0_Q4_K;
840
+ } break;
841
+ case GGML_TYPE_Q5_K:
842
+ {
843
+ nsg = N_SG_Q5_K;
844
+ nr0 = N_R0_Q5_K;
845
+ } break;
846
+ case GGML_TYPE_Q6_K:
847
+ {
848
+ nsg = N_SG_Q6_K;
849
+ nr0 = N_R0_Q6_K;
850
+ } break;
851
+ case GGML_TYPE_IQ2_XXS:
852
+ {
853
+ nsg = N_SG_IQ2_XXS;
854
+ nr0 = N_R0_IQ2_XXS;
855
+ smem = 256*8+128;
856
+ } break;
857
+ case GGML_TYPE_IQ2_XS:
858
+ {
859
+ nsg = N_SG_IQ2_XS;
860
+ nr0 = N_R0_IQ2_XS;
861
+ smem = 512*8+128;
862
+ } break;
863
+ case GGML_TYPE_IQ3_XXS:
864
+ {
865
+ nsg = N_SG_IQ3_XXS;
866
+ nr0 = N_R0_IQ3_XXS;
867
+ smem = 256*4+128;
868
+ } break;
869
+ case GGML_TYPE_IQ3_S:
870
+ {
871
+ nsg = N_SG_IQ3_S;
872
+ nr0 = N_R0_IQ3_S;
873
+ smem = 512*4;
874
+ } break;
875
+ case GGML_TYPE_IQ2_S:
876
+ {
877
+ nsg = N_SG_IQ2_S;
878
+ nr0 = N_R0_IQ2_S;
879
+ } break;
880
+ case GGML_TYPE_IQ1_S:
881
+ {
882
+ nsg = N_SG_IQ1_S;
883
+ nr0 = N_R0_IQ1_S;
884
+ } break;
885
+ case GGML_TYPE_IQ1_M:
886
+ {
887
+ nsg = N_SG_IQ1_M;
888
+ nr0 = N_R0_IQ1_M;
889
+ } break;
890
+ case GGML_TYPE_IQ4_NL:
891
+ {
892
+ nsg = N_SG_IQ4_NL;
893
+ nr0 = N_R0_IQ4_NL;
894
+ smem = 32*sizeof(float);
895
+ } break;
896
+ case GGML_TYPE_IQ4_XS:
897
+ {
898
+ nsg = N_SG_IQ4_XS;
899
+ nr0 = N_R0_IQ4_XS;
900
+ smem = 32*sizeof(float);
901
+ } break;
902
+ default:
903
+ {
904
+ GGML_LOG_ERROR("Asserting on type %d\n", (int)op->src[2]->type);
905
+ GGML_ABORT("not implemented");
906
+ }
907
+ };
908
+
909
+ snprintf(base, 256, "kernel_mul_mv_id_%s_%s%s", ggml_type_name(tsrc0), ggml_type_name(tsrc1), suffix);
910
+ snprintf(name, 256, "%s_nsg=%d", base, nsg);
911
+
912
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
913
+ if (res) {
914
+ return res;
915
+ }
916
+
917
+ ggml_metal_cv_t cv = ggml_metal_cv_init();
918
+
919
+ ggml_metal_cv_set_int16(cv, nsg, FC_MUL_MV + 0);
920
+
921
+ res = ggml_metal_library_compile_pipeline(lib, base, name, cv);
922
+
923
+ ggml_metal_cv_free(cv);
924
+
925
+ ggml_metal_pipeline_set_nr0 (res, nr0);
926
+ ggml_metal_pipeline_set_nr1 (res, nr1);
927
+ ggml_metal_pipeline_set_nsg (res, nsg);
928
+ ggml_metal_pipeline_set_smem(res, smem);
929
+
930
+ return res;
931
+ }
932
+
933
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_argmax(ggml_metal_library_t lib, const ggml_tensor * op) {
934
+ GGML_ASSERT(op->src[0]->type == GGML_TYPE_F32);
935
+ GGML_ASSERT(ggml_is_contiguous_1(op->src[0]));
936
+ GGML_ASSERT(op->src[0]->nb[0] == ggml_type_size(op->src[0]->type));
937
+
938
+ char base[256];
939
+ char name[256];
940
+
941
+ snprintf(base, 256, "kernel_argmax_%s", ggml_type_name(op->src[0]->type));
942
+ snprintf(name, 256, "%s", base);
943
+
944
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
945
+ if (res) {
946
+ return res;
947
+ }
948
+
949
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
950
+
951
+ ggml_metal_pipeline_set_smem(res, 32*(sizeof(float) + sizeof(int32_t)));
952
+
953
+ return res;
954
+ }
955
+
956
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_argsort(ggml_metal_library_t lib, const ggml_tensor * op) {
957
+ assert(op->op == GGML_OP_ARGSORT);
958
+
959
+ char base[256];
960
+ char name[256];
961
+
962
+ ggml_sort_order order = (ggml_sort_order) op->op_params[0];
963
+
964
+ const char * order_str = "undefined";
965
+ switch (order) {
966
+ case GGML_SORT_ORDER_ASC: order_str = "asc"; break;
967
+ case GGML_SORT_ORDER_DESC: order_str = "desc"; break;
968
+ default: GGML_ABORT("fatal error");
969
+ };
970
+
971
+ snprintf(base, 256, "kernel_argsort_%s_%s_%s", ggml_type_name(op->src[0]->type), ggml_type_name(op->type), order_str);
972
+ snprintf(name, 256, "%s", base);
973
+
974
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
975
+ if (res) {
976
+ return res;
977
+ }
978
+
979
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
980
+
981
+ return res;
982
+ }
983
+
984
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_argsort_merge(ggml_metal_library_t lib, const ggml_tensor * op) {
985
+ assert(op->op == GGML_OP_ARGSORT);
986
+
987
+ char base[256];
988
+ char name[256];
989
+
990
+ ggml_sort_order order = (ggml_sort_order) op->op_params[0];
991
+
992
+ const char * order_str = "undefined";
993
+ switch (order) {
994
+ case GGML_SORT_ORDER_ASC: order_str = "asc"; break;
995
+ case GGML_SORT_ORDER_DESC: order_str = "desc"; break;
996
+ default: GGML_ABORT("fatal error");
997
+ };
998
+
999
+ snprintf(base, 256, "kernel_argsort_merge_%s_%s_%s", ggml_type_name(op->src[0]->type), ggml_type_name(op->type), order_str);
1000
+ snprintf(name, 256, "%s", base);
1001
+
1002
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1003
+ if (res) {
1004
+ return res;
1005
+ }
1006
+
1007
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
1008
+
1009
+ return res;
1010
+ }
1011
+
1012
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_flash_attn_ext_pad(
1013
+ ggml_metal_library_t lib,
1014
+ const struct ggml_tensor * op,
1015
+ bool has_mask,
1016
+ int32_t ncpsg) {
1017
+ assert(op->op == GGML_OP_FLASH_ATTN_EXT);
1018
+ GGML_UNUSED(op);
1019
+
1020
+ char base[256];
1021
+ char name[256];
1022
+
1023
+ snprintf(base, 256, "kernel_%s",
1024
+ "flash_attn_ext_pad");
1025
+
1026
+ snprintf(name, 256, "%s_mask=%d_ncpsg=%d",
1027
+ base,
1028
+ has_mask,
1029
+ ncpsg);
1030
+
1031
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1032
+ if (res) {
1033
+ return res;
1034
+ }
1035
+
1036
+ ggml_metal_cv_t cv = ggml_metal_cv_init();
1037
+
1038
+ ggml_metal_cv_set_bool(cv, has_mask, FC_FLASH_ATTN_EXT_PAD + 0);
1039
+ //ggml_metal_cv_set_bool(cv, has_sinks, FC_FLASH_ATTN_EXT_PAD + 1);
1040
+ //ggml_metal_cv_set_bool(cv, has_bias, FC_FLASH_ATTN_EXT_PAD + 2);
1041
+ //ggml_metal_cv_set_bool(cv, has_scap, FC_FLASH_ATTN_EXT_PAD + 3);
1042
+
1043
+ //ggml_metal_cv_set_int32(cv, ns10, FC_FLASH_ATTN_EXT_PAD + 20);
1044
+ //ggml_metal_cv_set_int32(cv, ns20, FC_FLASH_ATTN_EXT_PAD + 21);
1045
+ //ggml_metal_cv_set_int32(cv, nsg, FC_FLASH_ATTN_EXT_PAD + 22);
1046
+ //ggml_metal_cv_set_int32(cv, nwg, FC_FLASH_ATTN_EXT_PAD + 23);
1047
+ //ggml_metal_cv_set_int32(cv, nqptg, FC_FLASH_ATTN_EXT_PAD + 24);
1048
+ ggml_metal_cv_set_int32(cv, ncpsg, FC_FLASH_ATTN_EXT_PAD + 25);
1049
+
1050
+ res = ggml_metal_library_compile_pipeline(lib, base, name, cv);
1051
+
1052
+ ggml_metal_cv_free(cv);
1053
+
1054
+ return res;
1055
+ }
1056
+
1057
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_flash_attn_ext_blk(
1058
+ ggml_metal_library_t lib,
1059
+ const struct ggml_tensor * op,
1060
+ int32_t nqptg,
1061
+ int32_t ncpsg) {
1062
+ assert(op->op == GGML_OP_FLASH_ATTN_EXT);
1063
+ GGML_UNUSED(op);
1064
+
1065
+ char base[256];
1066
+ char name[256];
1067
+
1068
+ snprintf(base, 256, "kernel_%s",
1069
+ "flash_attn_ext_blk");
1070
+
1071
+ snprintf(name, 256, "%s_nqptg=%d_ncpsg=%d",
1072
+ base,
1073
+ nqptg,
1074
+ ncpsg);
1075
+
1076
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1077
+ if (res) {
1078
+ return res;
1079
+ }
1080
+
1081
+ ggml_metal_cv_t cv = ggml_metal_cv_init();
1082
+
1083
+ //ggml_metal_cv_set_bool(cv, has_mask, FC_FLASH_ATTN_EXT_BLK + 0);
1084
+ //ggml_metal_cv_set_bool(cv, has_sinks, FC_FLASH_ATTN_EXT_BLK + 1);
1085
+ //ggml_metal_cv_set_bool(cv, has_bias, FC_FLASH_ATTN_EXT_BLK + 2);
1086
+ //ggml_metal_cv_set_bool(cv, has_scap, FC_FLASH_ATTN_EXT_BLK + 3);
1087
+
1088
+ //ggml_metal_cv_set_int32(cv, ns10, FC_FLASH_ATTN_EXT_BLK + 20);
1089
+ //ggml_metal_cv_set_int32(cv, ns20, FC_FLASH_ATTN_EXT_BLK + 21);
1090
+ //ggml_metal_cv_set_int32(cv, nsg, FC_FLASH_ATTN_EXT_BLK + 22);
1091
+ //ggml_metal_cv_set_int32(cv, nwg, FC_FLASH_ATTN_EXT_BLK + 23);
1092
+ ggml_metal_cv_set_int32(cv, nqptg, FC_FLASH_ATTN_EXT_BLK + 24);
1093
+ ggml_metal_cv_set_int32(cv, ncpsg, FC_FLASH_ATTN_EXT_BLK + 25);
1094
+
1095
+ res = ggml_metal_library_compile_pipeline(lib, base, name, cv);
1096
+
1097
+ ggml_metal_cv_free(cv);
1098
+
1099
+ return res;
1100
+ }
1101
+
1102
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_flash_attn_ext(
1103
+ ggml_metal_library_t lib,
1104
+ const ggml_tensor * op,
1105
+ bool has_mask,
1106
+ bool has_sinks,
1107
+ bool has_bias,
1108
+ bool has_scap,
1109
+ bool has_kvpad,
1110
+ int32_t nsg) {
1111
+ assert(op->op == GGML_OP_FLASH_ATTN_EXT);
1112
+
1113
+ char base[256];
1114
+ char name[256];
1115
+
1116
+ const int32_t dk = (int32_t) op->src[1]->ne[0];
1117
+ const int32_t dv = (int32_t) op->src[2]->ne[0];
1118
+
1119
+ const int32_t ns10 = op->src[1]->nb[1]/op->src[1]->nb[0];
1120
+ const int32_t ns20 = op->src[2]->nb[1]/op->src[2]->nb[0];
1121
+
1122
+ // do bounds checks for the mask?
1123
+ const bool bc_mask = op->src[3] && (op->src[3]->ne[1] % 8 != 0);
1124
+
1125
+ snprintf(base, 256, "kernel_%s_%s_dk%d_dv%d",
1126
+ "flash_attn_ext",
1127
+ ggml_type_name(op->src[1]->type),
1128
+ dk,
1129
+ dv);
1130
+
1131
+ snprintf(name, 256, "%s_mask=%d_sinks=%d_bias=%d_scap=%d_kvpad=%d_bcm=%d_ns10=%d_ns20=%d_nsg=%d",
1132
+ base,
1133
+ has_mask,
1134
+ has_sinks,
1135
+ has_bias,
1136
+ has_scap,
1137
+ has_kvpad,
1138
+ bc_mask,
1139
+ ns10,
1140
+ ns20,
1141
+ nsg);
1142
+
1143
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1144
+ if (res) {
1145
+ return res;
1146
+ }
1147
+
1148
+ ggml_metal_cv_t cv = ggml_metal_cv_init();
1149
+
1150
+ ggml_metal_cv_set_bool(cv, has_mask, FC_FLASH_ATTN_EXT + 0);
1151
+ ggml_metal_cv_set_bool(cv, has_sinks, FC_FLASH_ATTN_EXT + 1);
1152
+ ggml_metal_cv_set_bool(cv, has_bias, FC_FLASH_ATTN_EXT + 2);
1153
+ ggml_metal_cv_set_bool(cv, has_scap, FC_FLASH_ATTN_EXT + 3);
1154
+ ggml_metal_cv_set_bool(cv, has_kvpad, FC_FLASH_ATTN_EXT + 4);
1155
+
1156
+ ggml_metal_cv_set_bool(cv, bc_mask, FC_FLASH_ATTN_EXT + 10);
1157
+
1158
+ ggml_metal_cv_set_int32(cv, ns10, FC_FLASH_ATTN_EXT + 20);
1159
+ ggml_metal_cv_set_int32(cv, ns20, FC_FLASH_ATTN_EXT + 21);
1160
+ ggml_metal_cv_set_int32(cv, nsg, FC_FLASH_ATTN_EXT + 22);
1161
+
1162
+ res = ggml_metal_library_compile_pipeline(lib, base, name, cv);
1163
+
1164
+ ggml_metal_cv_free(cv);
1165
+
1166
+ return res;
1167
+ }
1168
+
1169
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_flash_attn_ext_vec(
1170
+ ggml_metal_library_t lib,
1171
+ const ggml_tensor * op,
1172
+ bool has_mask,
1173
+ bool has_sinks,
1174
+ bool has_bias,
1175
+ bool has_scap,
1176
+ bool has_kvpad,
1177
+ int32_t nsg,
1178
+ int32_t nwg) {
1179
+ assert(op->op == GGML_OP_FLASH_ATTN_EXT);
1180
+
1181
+ char base[256];
1182
+ char name[256];
1183
+
1184
+ const int32_t dk = (int32_t) op->src[1]->ne[0];
1185
+ const int32_t dv = (int32_t) op->src[2]->ne[0];
1186
+
1187
+ const int32_t ns10 = op->src[1]->nb[1]/op->src[1]->nb[0];
1188
+ const int32_t ns20 = op->src[2]->nb[1]/op->src[2]->nb[0];
1189
+
1190
+ snprintf(base, 256, "kernel_%s_%s_dk%d_dv%d",
1191
+ "flash_attn_ext_vec",
1192
+ ggml_type_name(op->src[1]->type),
1193
+ dk,
1194
+ dv);
1195
+
1196
+ snprintf(name, 256, "%s_mask=%d_sink=%d_bias=%d_scap=%d_kvpad=%d_ns10=%d_ns20=%d_nsg=%d_nwg=%d",
1197
+ base,
1198
+ has_mask,
1199
+ has_sinks,
1200
+ has_bias,
1201
+ has_scap,
1202
+ has_kvpad,
1203
+ ns10,
1204
+ ns20,
1205
+ nsg, nwg);
1206
+
1207
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1208
+ if (res) {
1209
+ return res;
1210
+ }
1211
+
1212
+ ggml_metal_cv_t cv = ggml_metal_cv_init();
1213
+
1214
+ ggml_metal_cv_set_bool(cv, has_mask, FC_FLASH_ATTN_EXT_VEC + 0);
1215
+ ggml_metal_cv_set_bool(cv, has_sinks, FC_FLASH_ATTN_EXT_VEC + 1);
1216
+ ggml_metal_cv_set_bool(cv, has_bias, FC_FLASH_ATTN_EXT_VEC + 2);
1217
+ ggml_metal_cv_set_bool(cv, has_scap, FC_FLASH_ATTN_EXT_VEC + 3);
1218
+ ggml_metal_cv_set_bool(cv, has_kvpad, FC_FLASH_ATTN_EXT_VEC + 4);
1219
+
1220
+ ggml_metal_cv_set_int32(cv, ns10, FC_FLASH_ATTN_EXT_VEC + 20);
1221
+ ggml_metal_cv_set_int32(cv, ns20, FC_FLASH_ATTN_EXT_VEC + 21);
1222
+ ggml_metal_cv_set_int32(cv, nsg, FC_FLASH_ATTN_EXT_VEC + 22);
1223
+ ggml_metal_cv_set_int32(cv, nwg, FC_FLASH_ATTN_EXT_VEC + 23);
1224
+
1225
+ res = ggml_metal_library_compile_pipeline(lib, base, name, cv);
1226
+
1227
+ ggml_metal_cv_free(cv);
1228
+
1229
+ return res;
1230
+ }
1231
+
1232
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_flash_attn_ext_vec_reduce(
1233
+ ggml_metal_library_t lib,
1234
+ const ggml_tensor * op,
1235
+ int32_t dv,
1236
+ int32_t nwg) {
1237
+ assert(op->op == GGML_OP_FLASH_ATTN_EXT);
1238
+
1239
+ char base[256];
1240
+ char name[256];
1241
+
1242
+ snprintf(base, 256, "kernel_flash_attn_ext_vec_reduce");
1243
+ snprintf(name, 256, "%s_dv=%d_nwg=%d", base, dv, nwg);
1244
+
1245
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1246
+ if (res) {
1247
+ return res;
1248
+ }
1249
+
1250
+ ggml_metal_cv_t cv = ggml_metal_cv_init();
1251
+
1252
+ ggml_metal_cv_set_int32(cv, dv, FC_FLASH_ATTN_EXT_VEC_REDUCE + 0);
1253
+ ggml_metal_cv_set_int32(cv, nwg, FC_FLASH_ATTN_EXT_VEC_REDUCE + 1);
1254
+
1255
+ res = ggml_metal_library_compile_pipeline(lib, base, name, cv);
1256
+
1257
+ ggml_metal_cv_free(cv);
1258
+
1259
+ return res;
1260
+
1261
+ GGML_UNUSED(op);
1262
+ }
1263
+
1264
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_bin(
1265
+ ggml_metal_library_t lib,
1266
+ ggml_op op,
1267
+ int32_t n_fuse,
1268
+ bool row) {
1269
+ char base[256];
1270
+ char name[256];
1271
+
1272
+ const char * op_str = "undefined";
1273
+ switch (op) {
1274
+ case GGML_OP_ADD: op_str = "add"; break;
1275
+ case GGML_OP_SUB: op_str = "sub"; break;
1276
+ case GGML_OP_MUL: op_str = "mul"; break;
1277
+ case GGML_OP_DIV: op_str = "div"; break;
1278
+ default: GGML_ABORT("fatal error");
1279
+ };
1280
+
1281
+ if (row) {
1282
+ snprintf(base, 256, "kernel_%s_row_c4_fuse_%d", op_str, n_fuse);
1283
+ } else {
1284
+ snprintf(base, 256, "kernel_%s_fuse_%d", op_str, n_fuse);
1285
+ }
1286
+
1287
+ snprintf(name, 256, "%s", base);
1288
+
1289
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1290
+ if (res) {
1291
+ return res;
1292
+ }
1293
+
1294
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
1295
+
1296
+ return res;
1297
+ }
1298
+
1299
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_l2_norm(ggml_metal_library_t lib, const ggml_tensor * op) {
1300
+ assert(op->op == GGML_OP_L2_NORM);
1301
+
1302
+ GGML_ASSERT(op->src[0]->ne[0] % 4 == 0);
1303
+ GGML_ASSERT(ggml_is_contiguous_1(op->src[0]));
1304
+
1305
+ char base[256];
1306
+ char name[256];
1307
+
1308
+ snprintf(base, 256, "kernel_l2_norm_f32");
1309
+ snprintf(name, 256, "%s", base);
1310
+
1311
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1312
+ if (res) {
1313
+ return res;
1314
+ }
1315
+
1316
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
1317
+
1318
+ ggml_metal_pipeline_set_smem(res, 32*sizeof(float));
1319
+
1320
+ return res;
1321
+ }
1322
+
1323
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_group_norm(ggml_metal_library_t lib, const ggml_tensor * op) {
1324
+ assert(op->op == GGML_OP_GROUP_NORM);
1325
+
1326
+ GGML_ASSERT(ggml_is_contiguous(op->src[0]));
1327
+
1328
+ char base[256];
1329
+ char name[256];
1330
+
1331
+ snprintf(base, 256, "kernel_group_norm_f32");
1332
+ snprintf(name, 256, "%s", base);
1333
+
1334
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1335
+ if (res) {
1336
+ return res;
1337
+ }
1338
+
1339
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
1340
+
1341
+ ggml_metal_pipeline_set_smem(res, 32*sizeof(float));
1342
+
1343
+ return res;
1344
+ }
1345
+
1346
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_norm(ggml_metal_library_t lib, const ggml_tensor * op, int n_fuse) {
1347
+ assert(op->op == GGML_OP_NORM || op->op == GGML_OP_RMS_NORM);
1348
+
1349
+ GGML_ASSERT(ggml_is_contiguous_rows(op->src[0]));
1350
+
1351
+ char base[256];
1352
+ char name[256];
1353
+
1354
+ const char * suffix = "";
1355
+ if (op->ne[0] % 4 == 0) {
1356
+ suffix = "_4";
1357
+ }
1358
+
1359
+ switch (op->op) {
1360
+ case GGML_OP_NORM:
1361
+ switch (n_fuse) {
1362
+ case 1: snprintf(base, 256, "kernel_norm_f32%s", suffix); break;
1363
+ case 2: snprintf(base, 256, "kernel_norm_mul_f32%s", suffix); break;
1364
+ case 3: snprintf(base, 256, "kernel_norm_mul_add_f32%s", suffix); break;
1365
+ default: GGML_ABORT("fatal error");
1366
+ } break;
1367
+ case GGML_OP_RMS_NORM:
1368
+ switch (n_fuse) {
1369
+ case 1: snprintf(base, 256, "kernel_rms_norm_f32%s", suffix); break;
1370
+ case 2: snprintf(base, 256, "kernel_rms_norm_mul_f32%s", suffix); break;
1371
+ case 3: snprintf(base, 256, "kernel_rms_norm_mul_add_f32%s", suffix); break;
1372
+ default: GGML_ABORT("fatal error");
1373
+ } break;
1374
+ default: GGML_ABORT("fatal error");
1375
+ }
1376
+
1377
+ snprintf(name, 256, "%s", base);
1378
+
1379
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1380
+ if (res) {
1381
+ return res;
1382
+ }
1383
+
1384
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
1385
+
1386
+ ggml_metal_pipeline_set_smem(res, 32*sizeof(float));
1387
+
1388
+ return res;
1389
+ }
1390
+
1391
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_rope(ggml_metal_library_t lib, const ggml_tensor * op) {
1392
+ assert(op->op == GGML_OP_ROPE);
1393
+
1394
+ char base[256];
1395
+ char name[256];
1396
+
1397
+ const int mode = ((const int32_t *) op->op_params)[2];
1398
+
1399
+ const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
1400
+ const bool is_mrope = mode & GGML_ROPE_TYPE_MROPE;
1401
+ const bool is_imrope = mode == GGML_ROPE_TYPE_IMROPE;
1402
+ const bool is_vision = mode == GGML_ROPE_TYPE_VISION;
1403
+
1404
+ if (is_neox) {
1405
+ snprintf(base, 256, "kernel_rope_neox_%s", ggml_type_name(op->src[0]->type));
1406
+ } else if ((is_mrope || is_imrope) && !is_vision) {
1407
+ GGML_ASSERT(op->src[1]->ne[0]*4 >= op->src[0]->ne[2]); // need at least 4 pos per token
1408
+ snprintf(base, 256, "kernel_rope_multi_%s", ggml_type_name(op->src[0]->type));
1409
+ } else if (is_vision) {
1410
+ GGML_ASSERT(op->src[1]->ne[0]*4 >= op->src[0]->ne[2]); // need at least 4 pos per token
1411
+ snprintf(base, 256, "kernel_rope_vision_%s", ggml_type_name(op->src[0]->type));
1412
+ } else {
1413
+ snprintf(base, 256, "kernel_rope_norm_%s", ggml_type_name(op->src[0]->type));
1414
+ }
1415
+
1416
+ snprintf(name, 256, "%s_imrope=%d", base, is_imrope ? 1 : 0);
1417
+
1418
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1419
+ if (res) {
1420
+ return res;
1421
+ }
1422
+
1423
+ ggml_metal_cv_t cv = ggml_metal_cv_init();
1424
+
1425
+ ggml_metal_cv_set_bool(cv, is_imrope, FC_ROPE + 0);
1426
+
1427
+ res = ggml_metal_library_compile_pipeline(lib, base, name, cv);
1428
+
1429
+ ggml_metal_cv_free(cv);
1430
+
1431
+ return res;
1432
+ }
1433
+
1434
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_im2col(ggml_metal_library_t lib, const ggml_tensor * op) {
1435
+ assert(op->op == GGML_OP_IM2COL);
1436
+
1437
+ GGML_ASSERT(ggml_is_contiguous(op->src[1]));
1438
+ GGML_ASSERT(op->src[1]->type == GGML_TYPE_F32);
1439
+ GGML_ASSERT(op->type == GGML_TYPE_F16 || op->type == GGML_TYPE_F32);
1440
+
1441
+ char base[256];
1442
+ char name[256];
1443
+
1444
+ snprintf(base, 256, "kernel_im2col_%s", ggml_type_name(op->type));
1445
+ snprintf(name, 256, "%s", base);
1446
+
1447
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1448
+ if (res) {
1449
+ return res;
1450
+ }
1451
+
1452
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
1453
+
1454
+ return res;
1455
+ }
1456
+
1457
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_conv_transpose_1d(ggml_metal_library_t lib, const ggml_tensor * op) {
1458
+ assert(op->op == GGML_OP_CONV_TRANSPOSE_1D);
1459
+
1460
+ GGML_ASSERT(ggml_is_contiguous(op->src[0]));
1461
+ GGML_ASSERT(ggml_is_contiguous(op->src[1]));
1462
+ GGML_ASSERT(op->src[0]->type == GGML_TYPE_F16 || op->src[0]->type == GGML_TYPE_F32);
1463
+ GGML_ASSERT(op->src[1]->type == GGML_TYPE_F32);
1464
+ GGML_ASSERT(op->type == GGML_TYPE_F32);
1465
+
1466
+ char base[256];
1467
+ char name[256];
1468
+
1469
+ snprintf(base, 256, "kernel_conv_transpose_1d_%s_%s", ggml_type_name(op->src[0]->type), ggml_type_name(op->src[1]->type));
1470
+ snprintf(name, 256, "%s", base);
1471
+
1472
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1473
+ if (res) {
1474
+ return res;
1475
+ }
1476
+
1477
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
1478
+
1479
+ return res;
1480
+ }
1481
+
1482
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_conv_transpose_2d(ggml_metal_library_t lib, const ggml_tensor * op) {
1483
+ assert(op->op == GGML_OP_CONV_TRANSPOSE_2D);
1484
+
1485
+ GGML_ASSERT(ggml_is_contiguous(op->src[0]));
1486
+ GGML_ASSERT(ggml_is_contiguous(op->src[1]));
1487
+ GGML_ASSERT(op->src[0]->type == GGML_TYPE_F16 || op->src[0]->type == GGML_TYPE_F32);
1488
+ GGML_ASSERT(op->src[1]->type == GGML_TYPE_F32);
1489
+ GGML_ASSERT(op->type == GGML_TYPE_F32);
1490
+
1491
+ char base[256];
1492
+ char name[256];
1493
+
1494
+ snprintf(base, 256, "kernel_conv_transpose_2d_%s_%s", ggml_type_name(op->src[0]->type), ggml_type_name(op->src[1]->type));
1495
+ snprintf(name, 256, "%s", base);
1496
+
1497
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1498
+ if (res) {
1499
+ return res;
1500
+ }
1501
+
1502
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
1503
+
1504
+ return res;
1505
+ }
1506
+
1507
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_conv_2d(ggml_metal_library_t lib, const ggml_tensor * op) {
1508
+ assert(op->op == GGML_OP_CONV_2D);
1509
+
1510
+ GGML_ASSERT(ggml_is_contiguous(op->src[0]));
1511
+ GGML_ASSERT(op->src[0]->type == GGML_TYPE_F16 || op->src[0]->type == GGML_TYPE_F32);
1512
+ GGML_ASSERT(op->src[1]->type == GGML_TYPE_F32);
1513
+ GGML_ASSERT(op->type == GGML_TYPE_F32);
1514
+
1515
+ char base[256];
1516
+ char name[256];
1517
+
1518
+ snprintf(base, 256, "kernel_conv_2d_%s_%s", ggml_type_name(op->src[0]->type), ggml_type_name(op->src[1]->type));
1519
+ snprintf(name, 256, "%s", base);
1520
+
1521
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1522
+ if (res) {
1523
+ return res;
1524
+ }
1525
+
1526
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
1527
+
1528
+ return res;
1529
+ }
1530
+
1531
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_upscale(ggml_metal_library_t lib, const ggml_tensor * op) {
1532
+ assert(op->op == GGML_OP_UPSCALE);
1533
+
1534
+ char base[256];
1535
+ char name[256];
1536
+
1537
+ snprintf(base, 256, "kernel_upscale_%s", ggml_type_name(op->src[0]->type));
1538
+ snprintf(name, 256, "%s", base);
1539
+
1540
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1541
+ if (res) {
1542
+ return res;
1543
+ }
1544
+
1545
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
1546
+
1547
+ return res;
1548
+ }
1549
+
1550
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_pad(ggml_metal_library_t lib, const ggml_tensor * op) {
1551
+ assert(op->op == GGML_OP_PAD);
1552
+
1553
+ char base[256];
1554
+ char name[256];
1555
+
1556
+ snprintf(base, 256, "kernel_pad_%s", ggml_type_name(op->src[0]->type));
1557
+ snprintf(name, 256, "%s", base);
1558
+
1559
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1560
+ if (res) {
1561
+ return res;
1562
+ }
1563
+
1564
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
1565
+
1566
+ return res;
1567
+ }
1568
+
1569
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_pad_reflect_1d(ggml_metal_library_t lib, const ggml_tensor * op) {
1570
+ assert(op->op == GGML_OP_PAD_REFLECT_1D);
1571
+
1572
+ char base[256];
1573
+ char name[256];
1574
+
1575
+ snprintf(base, 256, "kernel_pad_reflect_1d_%s", ggml_type_name(op->src[0]->type));
1576
+ snprintf(name, 256, "%s", base);
1577
+
1578
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1579
+ if (res) {
1580
+ return res;
1581
+ }
1582
+
1583
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
1584
+
1585
+ return res;
1586
+ }
1587
+
1588
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_arange(ggml_metal_library_t lib, const ggml_tensor * op) {
1589
+ assert(op->op == GGML_OP_ARANGE);
1590
+
1591
+ char base[256];
1592
+ char name[256];
1593
+
1594
+ snprintf(base, 256, "kernel_arange_%s", ggml_type_name(op->type));
1595
+ snprintf(name, 256, "%s", base);
1596
+
1597
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1598
+ if (res) {
1599
+ return res;
1600
+ }
1601
+
1602
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
1603
+
1604
+ return res;
1605
+ }
1606
+
1607
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_timestep_embedding(ggml_metal_library_t lib, const ggml_tensor * op) {
1608
+ assert(op->op == GGML_OP_TIMESTEP_EMBEDDING);
1609
+
1610
+ char base[256];
1611
+ char name[256];
1612
+
1613
+ snprintf(base, 256, "kernel_timestep_embedding_%s", ggml_type_name(op->src[0]->type));
1614
+ snprintf(name, 256, "%s", base);
1615
+
1616
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1617
+ if (res) {
1618
+ return res;
1619
+ }
1620
+
1621
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
1622
+
1623
+ return res;
1624
+ }
1625
+
1626
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_opt_step_adamw(ggml_metal_library_t lib, const ggml_tensor * op) {
1627
+ assert(op->op == GGML_OP_OPT_STEP_ADAMW);
1628
+
1629
+ char base[256];
1630
+ char name[256];
1631
+
1632
+ snprintf(base, 256, "kernel_opt_step_adamw_%s", ggml_type_name(op->src[0]->type));
1633
+ snprintf(name, 256, "%s", base);
1634
+
1635
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1636
+ if (res) {
1637
+ return res;
1638
+ }
1639
+
1640
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
1641
+
1642
+ return res;
1643
+ }
1644
+
1645
+ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_opt_step_sgd(ggml_metal_library_t lib, const ggml_tensor * op) {
1646
+ assert(op->op == GGML_OP_OPT_STEP_SGD);
1647
+
1648
+ char base[256];
1649
+ char name[256];
1650
+
1651
+ snprintf(base, 256, "kernel_opt_step_sgd_%s", ggml_type_name(op->src[0]->type));
1652
+ snprintf(name, 256, "%s", base);
1653
+
1654
+ ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
1655
+ if (res) {
1656
+ return res;
1657
+ }
1658
+
1659
+ res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
1660
+
1661
+ return res;
1662
+ }