@novastera-oss/llamarn 0.4.0 → 0.4.3-beta4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (979) hide show
  1. package/RNLlamaCpp.podspec +4 -1
  2. package/android/CMakeLists.txt +13 -3
  3. package/android/src/main/cpp/include/llama.h +44 -21
  4. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  8. package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
  9. package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
  10. package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
  11. package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
  12. package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
  13. package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
  14. package/android/src/main/jniLibs/x86/libggml.so +0 -0
  15. package/android/src/main/jniLibs/x86/libllama.so +0 -0
  16. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  17. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  18. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  19. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  20. package/cpp/LlamaCppModel.cpp +2 -10
  21. package/cpp/SystemUtils.cpp +3 -7
  22. package/cpp/build-info.cpp +2 -2
  23. package/cpp/llama.cpp/CMakeLists.txt +12 -0
  24. package/cpp/llama.cpp/CODEOWNERS +116 -10
  25. package/cpp/llama.cpp/CONTRIBUTING.md +30 -3
  26. package/cpp/llama.cpp/README.md +13 -5
  27. package/cpp/llama.cpp/build-xcframework.sh +5 -0
  28. package/cpp/llama.cpp/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
  29. package/cpp/llama.cpp/common/CMakeLists.txt +12 -2
  30. package/cpp/llama.cpp/common/arg.cpp +303 -795
  31. package/cpp/llama.cpp/common/arg.h +2 -3
  32. package/cpp/llama.cpp/common/chat-parser-xml-toolcall.cpp +861 -0
  33. package/cpp/llama.cpp/common/chat-parser-xml-toolcall.h +45 -0
  34. package/cpp/llama.cpp/common/chat-parser.cpp +156 -15
  35. package/cpp/llama.cpp/common/chat-parser.h +13 -0
  36. package/cpp/llama.cpp/common/chat.cpp +1147 -88
  37. package/cpp/llama.cpp/common/chat.h +16 -3
  38. package/cpp/llama.cpp/common/common.cpp +70 -15
  39. package/cpp/llama.cpp/common/common.h +57 -19
  40. package/cpp/llama.cpp/common/download.cpp +1072 -0
  41. package/cpp/llama.cpp/common/download.h +55 -0
  42. package/cpp/llama.cpp/common/http.h +73 -0
  43. package/cpp/llama.cpp/common/json-partial.cpp +70 -2
  44. package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +61 -22
  45. package/cpp/llama.cpp/common/json-schema-to-grammar.h +2 -0
  46. package/cpp/llama.cpp/common/log.cpp +59 -2
  47. package/cpp/llama.cpp/common/log.h +12 -4
  48. package/cpp/llama.cpp/common/sampling.cpp +84 -8
  49. package/cpp/llama.cpp/common/sampling.h +3 -1
  50. package/cpp/llama.cpp/common/speculative.cpp +1 -1
  51. package/cpp/llama.cpp/convert_hf_to_gguf.py +1608 -233
  52. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +6 -1
  53. package/cpp/llama.cpp/convert_lora_to_gguf.py +37 -5
  54. package/cpp/llama.cpp/ggml/CMakeLists.txt +47 -28
  55. package/cpp/llama.cpp/ggml/include/ggml-backend.h +19 -1
  56. package/cpp/llama.cpp/ggml/include/ggml-cpu.h +1 -1
  57. package/cpp/llama.cpp/ggml/include/ggml-hexagon.h +19 -0
  58. package/cpp/llama.cpp/ggml/include/ggml-metal.h +1 -6
  59. package/cpp/llama.cpp/ggml/include/ggml-rpc.h +7 -9
  60. package/cpp/llama.cpp/ggml/include/ggml-zdnn.h +2 -1
  61. package/cpp/llama.cpp/ggml/include/ggml.h +199 -6
  62. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +38 -0
  63. package/cpp/llama.cpp/ggml/src/ggml-alloc.c +299 -130
  64. package/cpp/llama.cpp/ggml/src/ggml-backend-impl.h +4 -4
  65. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +21 -5
  66. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +99 -2
  67. package/cpp/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +1 -1
  68. package/cpp/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +1 -0
  69. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +57 -45
  70. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +138 -47
  71. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +1584 -1773
  72. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +201 -317
  73. package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +146 -187
  74. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +771 -713
  75. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +135 -77
  76. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +5 -2
  77. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +428 -26
  78. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +16 -17
  79. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +318 -145
  80. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  81. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +155 -60
  82. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +8 -8
  83. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +0 -1
  84. package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +14 -0
  85. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +10 -9
  86. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +108 -64
  87. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +14 -4
  88. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +530 -87
  89. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +37 -45
  90. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +349 -127
  91. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +947 -1218
  92. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +5 -4
  93. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +143 -29
  94. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +82 -76
  95. package/cpp/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
  96. package/cpp/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
  97. package/cpp/llama.cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
  98. package/cpp/llama.cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
  99. package/cpp/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +151 -0
  100. package/cpp/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +7 -0
  101. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +233 -28
  102. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +326 -66
  103. package/cpp/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +12 -3
  104. package/cpp/llama.cpp/ggml/src/ggml-cuda/argsort.cu +102 -6
  105. package/cpp/llama.cpp/ggml/src/ggml-cuda/binbcast.cu +110 -76
  106. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +167 -38
  107. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d.cu +6 -11
  108. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +12 -0
  109. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy-utils.cuh +1 -1
  110. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +245 -151
  111. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cuh +1 -5
  112. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +341 -289
  113. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile.cu +49 -0
  114. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile.cuh +1233 -0
  115. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec.cuh +586 -0
  116. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +6 -6
  117. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +48 -0
  118. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +123 -220
  119. package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +41 -39
  120. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +715 -45
  121. package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cu +150 -0
  122. package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cuh +1 -0
  123. package/cpp/llama.cpp/ggml/src/ggml-cuda/mma.cuh +321 -24
  124. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmf.cu +93 -351
  125. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmf.cuh +828 -1
  126. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmid.cu +164 -0
  127. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmid.cuh +5 -0
  128. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +3 -166
  129. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +1 -1
  130. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvf.cu +371 -78
  131. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvf.cuh +3 -2
  132. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvq.cu +279 -147
  133. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvq.cuh +1 -1
  134. package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cu +97 -85
  135. package/cpp/llama.cpp/ggml/src/ggml-cuda/pad.cu +46 -23
  136. package/cpp/llama.cpp/ggml/src/ggml-cuda/pad_reflect_1d.cu +63 -54
  137. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +12 -10
  138. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +192 -77
  139. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cuh +2 -0
  140. package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +10 -9
  141. package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cu +137 -75
  142. package/cpp/llama.cpp/ggml/src/ggml-cuda/set.cu +39 -0
  143. package/cpp/llama.cpp/ggml/src/ggml-cuda/set.cuh +7 -0
  144. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
  145. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
  146. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
  147. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
  148. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
  149. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
  150. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
  151. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
  152. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
  153. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu +7 -0
  154. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu +7 -0
  155. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu +7 -0
  156. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu +7 -0
  157. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu +7 -0
  158. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu +7 -0
  159. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu +7 -0
  160. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu +7 -0
  161. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu +7 -0
  162. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu +7 -0
  163. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu +7 -0
  164. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu +7 -0
  165. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu +7 -0
  166. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu +7 -0
  167. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu +7 -0
  168. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu +7 -0
  169. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu +7 -0
  170. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu +7 -0
  171. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu +7 -0
  172. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu +7 -0
  173. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu +7 -0
  174. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu +7 -0
  175. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu +7 -0
  176. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu +7 -0
  177. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu +7 -0
  178. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu +7 -0
  179. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu +7 -0
  180. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu +7 -0
  181. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu +7 -0
  182. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu +7 -0
  183. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu +7 -0
  184. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu +7 -0
  185. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu +7 -0
  186. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu +7 -0
  187. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu +7 -0
  188. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu +7 -0
  189. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +40 -19
  190. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu +5 -0
  191. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu +5 -0
  192. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu +5 -0
  193. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu +5 -0
  194. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu +5 -0
  195. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu +5 -0
  196. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu +5 -0
  197. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu +5 -0
  198. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu +5 -0
  199. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu +5 -0
  200. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu +5 -0
  201. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu +5 -0
  202. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu +5 -0
  203. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu +5 -0
  204. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu +5 -0
  205. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu +5 -0
  206. package/cpp/llama.cpp/ggml/src/ggml-cuda/topk-moe.cu +336 -0
  207. package/cpp/llama.cpp/ggml/src/ggml-cuda/topk-moe.cuh +16 -0
  208. package/cpp/llama.cpp/ggml/src/ggml-cuda/tsembd.cu +3 -3
  209. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +105 -11
  210. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +36 -0
  211. package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +87 -6
  212. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +28 -12
  213. package/cpp/llama.cpp/ggml/src/ggml-hexagon/CMakeLists.txt +68 -0
  214. package/cpp/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3807 -0
  215. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +40 -0
  216. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/act-ops.c +442 -0
  217. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +360 -0
  218. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  219. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +40 -0
  220. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-dma.c +69 -0
  221. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-dma.h +119 -0
  222. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +156 -0
  223. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +64 -0
  224. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
  225. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-exp.c +93 -0
  226. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.c +60 -0
  227. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +49 -0
  228. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-utils.c +960 -0
  229. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +1032 -0
  230. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/main.c +829 -0
  231. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2223 -0
  232. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/ops-utils.h +149 -0
  233. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +418 -0
  234. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +402 -0
  235. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +255 -0
  236. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +297 -0
  237. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  238. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp-utils.c +448 -0
  239. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp-utils.h +220 -0
  240. package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +8 -13
  241. package/cpp/llama.cpp/ggml/src/ggml-impl.h +110 -12
  242. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +6 -5
  243. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +446 -0
  244. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  245. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-context.h +33 -0
  246. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-context.m +599 -0
  247. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1662 -0
  248. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.h +251 -0
  249. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.m +1527 -0
  250. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +244 -39
  251. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +3844 -0
  252. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-ops.h +90 -0
  253. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.cpp +723 -0
  254. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +3453 -1907
  255. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +3 -1
  256. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +10 -0
  257. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1331 -109
  258. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/cvt.cl +126 -0
  259. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +31 -4
  260. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +35 -7
  261. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +31 -4
  262. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  263. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  264. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +36 -12
  265. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  266. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +24 -10
  267. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +24 -10
  268. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  269. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  270. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  271. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  272. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  273. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  274. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  275. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/pad.cl +29 -20
  276. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +25 -10
  277. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rope.cl +50 -24
  278. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +123 -10
  279. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +2 -2
  280. package/cpp/llama.cpp/ggml/src/ggml-quants.c +1 -0
  281. package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +341 -161
  282. package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +6 -0
  283. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +6 -5
  284. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +74 -15
  285. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +50 -30
  286. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +10 -4
  287. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +166 -99
  288. package/cpp/llama.cpp/ggml/src/ggml-sycl/count-equal.cpp +79 -0
  289. package/cpp/llama.cpp/ggml/src/ggml-sycl/count-equal.hpp +9 -0
  290. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +72 -94
  291. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +67 -49
  292. package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +21 -31
  293. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +252 -316
  294. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +6 -2
  295. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +9 -6
  296. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +359 -142
  297. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
  298. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
  299. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +80 -60
  300. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +201 -132
  301. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +230 -55
  302. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.hpp +2 -0
  303. package/cpp/llama.cpp/ggml/src/ggml-sycl/pad.cpp +97 -0
  304. package/cpp/llama.cpp/ggml/src/ggml-sycl/pad.hpp +24 -0
  305. package/cpp/llama.cpp/ggml/src/ggml-sycl/pad_reflect_1d.cpp +72 -0
  306. package/cpp/llama.cpp/ggml/src/ggml-sycl/pad_reflect_1d.hpp +8 -0
  307. package/cpp/llama.cpp/ggml/src/ggml-sycl/presets.hpp +2 -0
  308. package/cpp/llama.cpp/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
  309. package/cpp/llama.cpp/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
  310. package/cpp/llama.cpp/ggml/src/ggml-sycl/roll.cpp +122 -0
  311. package/cpp/llama.cpp/ggml/src/ggml-sycl/roll.hpp +20 -0
  312. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +50 -41
  313. package/cpp/llama.cpp/ggml/src/ggml-sycl/set.cpp +73 -0
  314. package/cpp/llama.cpp/ggml/src/ggml-sycl/set.hpp +5 -0
  315. package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +45 -36
  316. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +330 -165
  317. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +4 -0
  318. package/cpp/llama.cpp/ggml/src/ggml-sycl/ssm_conv.cpp +127 -0
  319. package/cpp/llama.cpp/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
  320. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +12 -6
  321. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +16 -12
  322. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +38 -18
  323. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +4184 -2159
  324. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
  325. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +2 -2
  326. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +2 -2
  327. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
  328. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +1 -1
  329. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
  330. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +2 -2
  331. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +33 -26
  332. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
  333. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
  334. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +2 -2
  335. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +2 -2
  336. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +2 -2
  337. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +1 -1
  338. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +53 -30
  339. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +1 -1
  340. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +2 -2
  341. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +3 -3
  342. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +13 -6
  343. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
  344. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +2 -2
  345. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +2 -2
  346. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +1 -1
  347. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs.comp → dequant_funcs.glsl} +138 -2
  348. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs_cm2.comp → dequant_funcs_cm2.glsl} +18 -4
  349. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_head.comp → dequant_head.glsl} +1 -1
  350. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
  351. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +1 -1
  352. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +2 -2
  353. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +1 -1
  354. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +3 -2
  355. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +7 -6
  356. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +5 -3
  357. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +1 -1
  358. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +1 -1
  359. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +3 -3
  360. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +3 -3
  361. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
  362. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +1 -1
  363. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +1 -1
  364. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +3 -3
  365. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +1 -1
  366. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +1 -1
  367. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +3 -3
  368. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
  369. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +1 -1
  370. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +1 -1
  371. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +2 -2
  372. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +3 -2
  373. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
  374. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +52 -14
  375. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{flash_attn_base.comp → flash_attn_base.glsl} +50 -12
  376. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +61 -12
  377. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +54 -12
  378. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +5 -1
  379. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
  380. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +2 -2
  381. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +2 -2
  382. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +2 -2
  383. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +2 -2
  384. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +2 -2
  385. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +2 -2
  386. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{generic_binary_head.comp → generic_binary_head.glsl} +10 -2
  387. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +21 -12
  388. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +28 -18
  389. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{glu_head.comp → glu_head.glsl} +1 -1
  390. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +2 -2
  391. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
  392. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
  393. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +15 -7
  394. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +125 -0
  395. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +2 -2
  396. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +2 -2
  397. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
  398. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +2 -2
  399. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +1 -1
  400. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +229 -0
  401. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +33 -0
  402. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +1 -1
  403. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +1 -1
  404. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +1 -1
  405. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +1 -1
  406. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +1 -1
  407. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +1 -1
  408. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +1 -1
  409. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +9 -7
  410. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +9 -7
  411. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +3 -5
  412. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +1 -1
  413. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +3 -5
  414. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +3 -5
  415. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +1 -1
  416. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +140 -0
  417. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +106 -634
  418. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +118 -9
  419. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl +556 -0
  420. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +70 -0
  421. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +77 -214
  422. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +589 -0
  423. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
  424. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +97 -13
  425. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
  426. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +2 -2
  427. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +2 -2
  428. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +1 -1
  429. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +25 -4
  430. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +1 -1
  431. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +55 -5
  432. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +2 -2
  433. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +2 -2
  434. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +2 -2
  435. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +2 -2
  436. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +45 -3
  437. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +2 -2
  438. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +2 -2
  439. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +2 -2
  440. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +227 -0
  441. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
  442. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +5 -52
  443. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +5 -35
  444. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +5 -35
  445. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +27 -0
  446. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +5 -41
  447. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
  448. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +2 -2
  449. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +2 -2
  450. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +2 -2
  451. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +2 -2
  452. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +2 -2
  453. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +1 -1
  454. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +6 -2
  455. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
  456. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +2 -2
  457. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +2 -2
  458. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +44 -0
  459. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +140 -0
  460. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
  461. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +2 -2
  462. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +1 -1
  463. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +2 -2
  464. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +2 -2
  465. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +2 -2
  466. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +5 -4
  467. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +171 -0
  468. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
  469. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{types.comp → types.glsl} +79 -29
  470. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -12
  471. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +471 -196
  472. package/cpp/llama.cpp/ggml/src/ggml-webgpu/CMakeLists.txt +8 -0
  473. package/cpp/llama.cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1690 -383
  474. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +188 -0
  475. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +45 -0
  476. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +930 -0
  477. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +101 -0
  478. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +57 -10
  479. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +874 -0
  480. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
  481. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +25 -912
  482. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +97 -0
  483. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.tmpl.wgsl +247 -0
  484. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.tmpl.wgsl +302 -0
  485. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +267 -0
  486. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
  487. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
  488. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.tmpl.wgsl +90 -0
  489. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/{set_rows.wgsl → set_rows.tmpl.wgsl} +38 -8
  490. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
  491. package/cpp/llama.cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
  492. package/cpp/llama.cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +96 -314
  493. package/cpp/llama.cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  494. package/cpp/llama.cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  495. package/cpp/llama.cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
  496. package/cpp/llama.cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
  497. package/cpp/llama.cpp/ggml/src/ggml.c +440 -17
  498. package/cpp/llama.cpp/ggml/src/gguf.cpp +104 -29
  499. package/cpp/llama.cpp/gguf-py/gguf/constants.py +363 -13
  500. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +64 -0
  501. package/cpp/llama.cpp/gguf-py/gguf/lazy.py +8 -3
  502. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_convert_endian.py +6 -0
  503. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +156 -18
  504. package/cpp/llama.cpp/gguf-py/gguf/utility.py +80 -0
  505. package/cpp/llama.cpp/gguf-py/gguf/vocab.py +4 -4
  506. package/cpp/llama.cpp/include/llama.h +44 -21
  507. package/cpp/llama.cpp/media/llama1-icon-transparent.png +0 -0
  508. package/cpp/llama.cpp/media/llama1-icon-transparent.svg +77 -0
  509. package/cpp/llama.cpp/media/llama1-icon.png +0 -0
  510. package/cpp/llama.cpp/media/llama1-icon.svg +87 -0
  511. package/cpp/llama.cpp/requirements/requirements-all.txt +2 -0
  512. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -3
  513. package/cpp/llama.cpp/requirements/requirements-convert_legacy_llama.txt +3 -1
  514. package/cpp/llama.cpp/requirements/requirements-tool_bench.txt +1 -1
  515. package/cpp/llama.cpp/src/CMakeLists.txt +101 -0
  516. package/cpp/llama.cpp/src/llama-adapter.cpp +33 -0
  517. package/cpp/llama.cpp/src/llama-adapter.h +3 -0
  518. package/cpp/llama.cpp/src/llama-arch.cpp +344 -14
  519. package/cpp/llama.cpp/src/llama-arch.h +50 -0
  520. package/cpp/llama.cpp/src/llama-batch.cpp +63 -31
  521. package/cpp/llama.cpp/src/llama-batch.h +13 -2
  522. package/cpp/llama.cpp/src/llama-chat.cpp +85 -3
  523. package/cpp/llama.cpp/src/llama-chat.h +4 -0
  524. package/cpp/llama.cpp/src/llama-context.cpp +300 -45
  525. package/cpp/llama.cpp/src/llama-context.h +16 -6
  526. package/cpp/llama.cpp/src/llama-cparams.h +2 -1
  527. package/cpp/llama.cpp/src/llama-grammar.cpp +17 -9
  528. package/cpp/llama.cpp/src/llama-graph.cpp +226 -64
  529. package/cpp/llama.cpp/src/llama-graph.h +27 -5
  530. package/cpp/llama.cpp/src/llama-hparams.cpp +53 -2
  531. package/cpp/llama.cpp/src/llama-hparams.h +48 -8
  532. package/cpp/llama.cpp/src/llama-impl.cpp +3 -3
  533. package/cpp/llama.cpp/src/llama-impl.h +2 -0
  534. package/cpp/llama.cpp/src/llama-kv-cache-iswa.cpp +13 -3
  535. package/cpp/llama.cpp/src/llama-kv-cache-iswa.h +2 -0
  536. package/cpp/llama.cpp/src/llama-kv-cache.cpp +120 -62
  537. package/cpp/llama.cpp/src/llama-kv-cache.h +13 -4
  538. package/cpp/llama.cpp/src/llama-kv-cells.h +44 -2
  539. package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +19 -9
  540. package/cpp/llama.cpp/src/llama-memory-hybrid.h +2 -0
  541. package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +38 -17
  542. package/cpp/llama.cpp/src/llama-memory-recurrent.h +5 -2
  543. package/cpp/llama.cpp/src/llama-memory.h +3 -0
  544. package/cpp/llama.cpp/src/llama-model-loader.cpp +2 -0
  545. package/cpp/llama.cpp/src/llama-model.cpp +1070 -12614
  546. package/cpp/llama.cpp/src/llama-model.h +40 -4
  547. package/cpp/llama.cpp/src/llama-quant.cpp +14 -6
  548. package/cpp/llama.cpp/src/llama-sampling.cpp +243 -136
  549. package/cpp/llama.cpp/src/llama-vocab.cpp +43 -3
  550. package/cpp/llama.cpp/src/llama-vocab.h +43 -39
  551. package/cpp/llama.cpp/src/llama.cpp +69 -10
  552. package/cpp/llama.cpp/src/models/afmoe.cpp +187 -0
  553. package/cpp/llama.cpp/src/models/apertus.cpp +125 -0
  554. package/cpp/llama.cpp/src/models/arcee.cpp +135 -0
  555. package/cpp/llama.cpp/src/models/arctic.cpp +138 -0
  556. package/cpp/llama.cpp/src/models/arwkv7.cpp +86 -0
  557. package/cpp/llama.cpp/src/models/baichuan.cpp +122 -0
  558. package/cpp/llama.cpp/src/models/bailingmoe.cpp +144 -0
  559. package/cpp/llama.cpp/src/models/bailingmoe2.cpp +135 -0
  560. package/cpp/llama.cpp/src/models/bert.cpp +176 -0
  561. package/cpp/llama.cpp/src/models/bitnet.cpp +160 -0
  562. package/cpp/llama.cpp/src/models/bloom.cpp +101 -0
  563. package/cpp/llama.cpp/src/models/chameleon.cpp +178 -0
  564. package/cpp/llama.cpp/src/models/chatglm.cpp +132 -0
  565. package/cpp/llama.cpp/src/models/codeshell.cpp +111 -0
  566. package/cpp/llama.cpp/src/models/cogvlm.cpp +100 -0
  567. package/cpp/llama.cpp/src/models/cohere2-iswa.cpp +131 -0
  568. package/cpp/llama.cpp/src/models/command-r.cpp +122 -0
  569. package/cpp/llama.cpp/src/models/dbrx.cpp +123 -0
  570. package/cpp/llama.cpp/src/models/deci.cpp +135 -0
  571. package/cpp/llama.cpp/src/models/deepseek.cpp +144 -0
  572. package/cpp/llama.cpp/src/models/deepseek2.cpp +237 -0
  573. package/cpp/llama.cpp/src/models/dots1.cpp +134 -0
  574. package/cpp/llama.cpp/src/models/dream.cpp +105 -0
  575. package/cpp/llama.cpp/src/models/ernie4-5-moe.cpp +150 -0
  576. package/cpp/llama.cpp/src/models/ernie4-5.cpp +110 -0
  577. package/cpp/llama.cpp/src/models/exaone.cpp +114 -0
  578. package/cpp/llama.cpp/src/models/exaone4.cpp +123 -0
  579. package/cpp/llama.cpp/src/models/falcon-h1.cpp +113 -0
  580. package/cpp/llama.cpp/src/models/falcon.cpp +120 -0
  581. package/cpp/llama.cpp/src/models/gemma-embedding.cpp +120 -0
  582. package/cpp/llama.cpp/src/models/gemma.cpp +112 -0
  583. package/cpp/llama.cpp/src/models/gemma2-iswa.cpp +125 -0
  584. package/cpp/llama.cpp/src/models/gemma3-iswa.cpp +131 -0
  585. package/cpp/llama.cpp/src/models/gemma3n-iswa.cpp +377 -0
  586. package/cpp/llama.cpp/src/models/glm4-moe.cpp +153 -0
  587. package/cpp/llama.cpp/src/models/glm4.cpp +127 -0
  588. package/cpp/llama.cpp/src/models/gpt2.cpp +105 -0
  589. package/cpp/llama.cpp/src/models/gptneox.cpp +144 -0
  590. package/cpp/llama.cpp/src/models/granite-hybrid.cpp +196 -0
  591. package/cpp/llama.cpp/src/models/granite.cpp +211 -0
  592. package/cpp/llama.cpp/src/models/graph-context-mamba.cpp +283 -0
  593. package/cpp/llama.cpp/src/models/grok.cpp +159 -0
  594. package/cpp/llama.cpp/src/models/grovemoe.cpp +141 -0
  595. package/cpp/llama.cpp/src/models/hunyuan-dense.cpp +132 -0
  596. package/cpp/llama.cpp/src/models/hunyuan-moe.cpp +154 -0
  597. package/cpp/llama.cpp/src/models/internlm2.cpp +120 -0
  598. package/cpp/llama.cpp/src/models/jais.cpp +86 -0
  599. package/cpp/llama.cpp/src/models/jamba.cpp +106 -0
  600. package/cpp/llama.cpp/src/models/lfm2.cpp +173 -0
  601. package/cpp/llama.cpp/src/models/llada-moe.cpp +122 -0
  602. package/cpp/llama.cpp/src/models/llada.cpp +99 -0
  603. package/cpp/llama.cpp/src/models/llama-iswa.cpp +174 -0
  604. package/cpp/llama.cpp/src/models/llama.cpp +155 -0
  605. package/cpp/llama.cpp/src/models/mamba.cpp +55 -0
  606. package/cpp/llama.cpp/src/models/minicpm3.cpp +199 -0
  607. package/cpp/llama.cpp/src/models/minimax-m2.cpp +124 -0
  608. package/cpp/llama.cpp/src/models/models.h +485 -0
  609. package/cpp/llama.cpp/src/models/mpt.cpp +126 -0
  610. package/cpp/llama.cpp/src/models/nemotron-h.cpp +121 -0
  611. package/cpp/llama.cpp/src/models/nemotron.cpp +122 -0
  612. package/cpp/llama.cpp/src/models/neo-bert.cpp +104 -0
  613. package/cpp/llama.cpp/src/models/olmo.cpp +121 -0
  614. package/cpp/llama.cpp/src/models/olmo2.cpp +150 -0
  615. package/cpp/llama.cpp/src/models/olmoe.cpp +124 -0
  616. package/cpp/llama.cpp/src/models/openai-moe-iswa.cpp +124 -0
  617. package/cpp/llama.cpp/src/models/openelm.cpp +124 -0
  618. package/cpp/llama.cpp/src/models/orion.cpp +123 -0
  619. package/cpp/llama.cpp/src/models/pangu-embedded.cpp +121 -0
  620. package/cpp/llama.cpp/src/models/phi2.cpp +121 -0
  621. package/cpp/llama.cpp/src/models/phi3.cpp +152 -0
  622. package/cpp/llama.cpp/src/models/plamo.cpp +110 -0
  623. package/cpp/llama.cpp/src/models/plamo2.cpp +316 -0
  624. package/cpp/llama.cpp/src/models/plm.cpp +168 -0
  625. package/cpp/llama.cpp/src/models/qwen.cpp +108 -0
  626. package/cpp/llama.cpp/src/models/qwen2.cpp +117 -0
  627. package/cpp/llama.cpp/src/models/qwen2moe.cpp +151 -0
  628. package/cpp/llama.cpp/src/models/qwen2vl.cpp +117 -0
  629. package/cpp/llama.cpp/src/models/qwen3.cpp +117 -0
  630. package/cpp/llama.cpp/src/models/qwen3moe.cpp +124 -0
  631. package/cpp/llama.cpp/src/models/qwen3vl-moe.cpp +149 -0
  632. package/cpp/llama.cpp/src/models/qwen3vl.cpp +141 -0
  633. package/cpp/llama.cpp/src/models/refact.cpp +94 -0
  634. package/cpp/llama.cpp/src/models/rwkv6-base.cpp +162 -0
  635. package/cpp/llama.cpp/src/models/rwkv6.cpp +94 -0
  636. package/cpp/llama.cpp/src/models/rwkv6qwen2.cpp +86 -0
  637. package/cpp/llama.cpp/src/models/rwkv7-base.cpp +135 -0
  638. package/cpp/llama.cpp/src/models/rwkv7.cpp +90 -0
  639. package/cpp/llama.cpp/src/models/seed-oss.cpp +124 -0
  640. package/cpp/llama.cpp/src/models/smallthinker.cpp +120 -0
  641. package/cpp/llama.cpp/src/models/smollm3.cpp +128 -0
  642. package/cpp/llama.cpp/src/models/stablelm.cpp +146 -0
  643. package/cpp/llama.cpp/src/models/starcoder.cpp +100 -0
  644. package/cpp/llama.cpp/src/models/starcoder2.cpp +121 -0
  645. package/cpp/llama.cpp/src/models/t5-dec.cpp +166 -0
  646. package/cpp/llama.cpp/src/models/t5-enc.cpp +96 -0
  647. package/cpp/llama.cpp/src/models/wavtokenizer-dec.cpp +149 -0
  648. package/cpp/llama.cpp/src/models/xverse.cpp +108 -0
  649. package/cpp/llama.cpp/src/unicode.cpp +77 -0
  650. package/cpp/llama.cpp/src/unicode.h +43 -0
  651. package/cpp/llama.cpp/vendor/cpp-httplib/CMakeLists.txt +94 -0
  652. package/cpp/llama.cpp/vendor/cpp-httplib/httplib.cpp +9339 -0
  653. package/cpp/llama.cpp/vendor/cpp-httplib/httplib.h +433 -8222
  654. package/cpp/llama.cpp/vendor/cpp-httplib/patch-boringssl.cmake +6 -0
  655. package/cpp/llama.cpp/vendor/miniaudio/miniaudio.h +4179 -1900
  656. package/cpp/llama.cpp/vendor/minja/chat-template.hpp +9 -2
  657. package/cpp/llama.cpp/vendor/minja/minja.hpp +101 -22
  658. package/cpp/rn-completion.cpp +3 -27
  659. package/ios/include/chat.h +16 -3
  660. package/ios/include/common/minja/chat-template.hpp +9 -2
  661. package/ios/include/common/minja/minja.hpp +101 -22
  662. package/ios/include/common.h +57 -19
  663. package/ios/include/json-schema-to-grammar.h +2 -0
  664. package/ios/include/llama.h +44 -21
  665. package/ios/include/log.h +12 -4
  666. package/ios/include/sampling.h +3 -1
  667. package/ios/libs/llama.xcframework/Info.plist +20 -20
  668. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  669. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +6399 -5557
  670. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +19 -1
  671. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +1 -1
  672. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-metal.h +1 -6
  673. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +199 -6
  674. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +44 -21
  675. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  676. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  677. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +6362 -5520
  678. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4813 -4241
  679. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +19 -1
  680. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +1 -1
  681. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +1 -6
  682. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +199 -6
  683. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +44 -21
  684. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  685. package/package.json +10 -4
  686. package/cpp/llama.cpp/ggml/src/ggml-cann/Doxyfile +0 -2579
  687. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +0 -371
  688. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cuh +0 -3
  689. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +0 -379
  690. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cuh +0 -3
  691. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +0 -495
  692. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +0 -486
  693. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
  694. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
  695. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
  696. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
  697. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
  698. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
  699. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
  700. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
  701. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
  702. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
  703. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
  704. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
  705. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
  706. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
  707. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
  708. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
  709. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
  710. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
  711. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
  712. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
  713. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
  714. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
  715. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
  716. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
  717. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
  718. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
  719. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
  720. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
  721. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
  722. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
  723. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
  724. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
  725. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
  726. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
  727. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
  728. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
  729. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
  730. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
  731. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
  732. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
  733. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
  734. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
  735. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
  736. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
  737. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
  738. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
  739. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
  740. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
  741. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
  742. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
  743. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
  744. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
  745. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
  746. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
  747. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
  748. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
  749. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
  750. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
  751. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
  752. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
  753. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
  754. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
  755. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
  756. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
  757. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
  758. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
  759. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
  760. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
  761. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
  762. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
  763. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
  764. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
  765. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
  766. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
  767. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
  768. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
  769. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
  770. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
  771. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
  772. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
  773. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
  774. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
  775. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
  776. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
  777. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
  778. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
  779. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +0 -6886
  780. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +0 -154
  781. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +0 -105
  782. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +0 -55
  783. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +0 -60
  784. package/cpp/llama.cpp/ggml/src/ggml-zdnn/ggml-zdnn-impl.h +0 -97
  785. package/cpp/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  786. package/cpp/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  787. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  788. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
  789. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
  790. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  791. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
  792. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
  793. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  794. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
  795. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
  796. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  797. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
  798. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
  799. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  800. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
  801. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
  802. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  803. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
  804. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
  805. package/cpp/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  806. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  807. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
  808. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
  809. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  810. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
  811. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
  812. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  813. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
  814. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
  815. package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
  816. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  817. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
  818. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
  819. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  820. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
  821. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
  822. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  823. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
  824. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
  825. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  826. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
  827. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
  828. package/cpp/llama.cpp/models/templates/ByteDance-Seed-OSS.jinja +0 -171
  829. package/cpp/llama.cpp/models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja +0 -202
  830. package/cpp/llama.cpp/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja +0 -156
  831. package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +0 -124
  832. package/cpp/llama.cpp/models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja +0 -152
  833. package/cpp/llama.cpp/models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja +0 -152
  834. package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +0 -62
  835. package/cpp/llama.cpp/models/templates/Qwen-Qwen2.5-7B-Instruct.jinja +0 -54
  836. package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +0 -85
  837. package/cpp/llama.cpp/models/templates/README.md +0 -25
  838. package/cpp/llama.cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja +0 -1
  839. package/cpp/llama.cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja +0 -1
  840. package/cpp/llama.cpp/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja +0 -57
  841. package/cpp/llama.cpp/models/templates/google-gemma-2-2b-it.jinja +0 -4
  842. package/cpp/llama.cpp/models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja +0 -59
  843. package/cpp/llama.cpp/models/templates/llama-cpp-deepseek-r1.jinja +0 -76
  844. package/cpp/llama.cpp/models/templates/llama-cpp-rwkv-world.jinja +0 -34
  845. package/cpp/llama.cpp/models/templates/meetkai-functionary-medium-v3.1.jinja +0 -58
  846. package/cpp/llama.cpp/models/templates/meetkai-functionary-medium-v3.2.jinja +0 -287
  847. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja +0 -109
  848. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja +0 -93
  849. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja +0 -109
  850. package/cpp/llama.cpp/models/templates/microsoft-Phi-3.5-mini-instruct.jinja +0 -8
  851. package/cpp/llama.cpp/models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja +0 -87
  852. package/cpp/llama.cpp/models/templates/moonshotai-Kimi-K2.jinja +0 -43
  853. package/cpp/llama.cpp/models/templates/openai-gpt-oss-120b.jinja +0 -331
  854. package/cpp/llama.cpp/models/templates/unsloth-mistral-Devstral-Small-2507.jinja +0 -105
  855. package/cpp/llama.cpp/prompts/LLM-questions.txt +0 -49
  856. package/cpp/llama.cpp/prompts/alpaca.txt +0 -1
  857. package/cpp/llama.cpp/prompts/assistant.txt +0 -31
  858. package/cpp/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
  859. package/cpp/llama.cpp/prompts/chat-with-bob.txt +0 -7
  860. package/cpp/llama.cpp/prompts/chat-with-qwen.txt +0 -1
  861. package/cpp/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
  862. package/cpp/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
  863. package/cpp/llama.cpp/prompts/chat.txt +0 -28
  864. package/cpp/llama.cpp/prompts/dan-modified.txt +0 -1
  865. package/cpp/llama.cpp/prompts/dan.txt +0 -1
  866. package/cpp/llama.cpp/prompts/mnemonics.txt +0 -93
  867. package/cpp/llama.cpp/prompts/parallel-questions.txt +0 -43
  868. package/cpp/llama.cpp/prompts/reason-act.txt +0 -18
  869. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
  870. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  871. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5524
  872. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +0 -4247
  873. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-alloc.h +0 -76
  874. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +0 -354
  875. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-blas.h +0 -25
  876. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +0 -145
  877. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-metal.h +0 -66
  878. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +0 -256
  879. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +0 -2492
  880. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/gguf.h +0 -202
  881. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +0 -1391
  882. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Modules/module.modulemap +0 -17
  883. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Resources/Info.plist +0 -32
  884. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-alloc.h +0 -76
  885. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +0 -354
  886. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-blas.h +0 -25
  887. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +0 -145
  888. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-metal.h +0 -66
  889. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +0 -256
  890. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +0 -2492
  891. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/gguf.h +0 -202
  892. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +0 -1391
  893. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Modules/module.modulemap +0 -17
  894. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Resources/Info.plist +0 -32
  895. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  896. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-alloc.h +0 -76
  897. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +0 -354
  898. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-blas.h +0 -25
  899. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +0 -145
  900. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-metal.h +0 -66
  901. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +0 -256
  902. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +0 -2492
  903. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/gguf.h +0 -202
  904. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +0 -1391
  905. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Modules/module.modulemap +0 -17
  906. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Resources/Info.plist +0 -32
  907. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  908. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  909. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
  910. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  911. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5561
  912. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-alloc.h +0 -76
  913. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +0 -354
  914. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-blas.h +0 -25
  915. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +0 -145
  916. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-metal.h +0 -66
  917. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +0 -256
  918. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +0 -2492
  919. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/gguf.h +0 -202
  920. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +0 -1391
  921. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Info.plist +0 -35
  922. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Modules/module.modulemap +0 -17
  923. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  924. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
  925. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  926. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5524
  927. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +0 -4246
  928. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +0 -76
  929. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +0 -354
  930. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +0 -25
  931. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +0 -145
  932. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +0 -66
  933. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +0 -256
  934. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +0 -2492
  935. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +0 -202
  936. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -1391
  937. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Info.plist +0 -35
  938. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +0 -17
  939. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  940. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
  941. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  942. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5558
  943. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-alloc.h +0 -76
  944. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +0 -354
  945. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-blas.h +0 -25
  946. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +0 -145
  947. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-metal.h +0 -66
  948. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +0 -256
  949. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +0 -2492
  950. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/gguf.h +0 -202
  951. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +0 -1391
  952. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Info.plist +0 -32
  953. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Modules/module.modulemap +0 -17
  954. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  955. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
  956. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  957. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5520
  958. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +0 -4243
  959. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +0 -76
  960. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +0 -354
  961. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +0 -25
  962. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +0 -145
  963. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +0 -66
  964. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +0 -256
  965. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +0 -2492
  966. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +0 -202
  967. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -1391
  968. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Info.plist +0 -32
  969. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +0 -17
  970. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  971. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{test_bfloat16_support.comp → feature-tests/bfloat16.comp} +0 -0
  972. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat_support.comp → feature-tests/coopmat.comp} +0 -0
  973. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat2_support.comp → feature-tests/coopmat2.comp} +0 -0
  974. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{test_integer_dot_support.comp → feature-tests/integer_dot.comp} +0 -0
  975. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{generic_head.comp → generic_head.glsl} +0 -0
  976. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{generic_unary_head.comp → generic_unary_head.glsl} +0 -0
  977. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{glu_main.comp → glu_main.glsl} +0 -0
  978. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{rte.comp → rte.glsl} +0 -0
  979. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{utils.comp → utils.glsl} +0 -0
@@ -83,6 +83,7 @@ extern "C" {
83
83
  LLAMA_ROPE_TYPE_NORM = 0,
84
84
  LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX,
85
85
  LLAMA_ROPE_TYPE_MROPE = GGML_ROPE_TYPE_MROPE,
86
+ LLAMA_ROPE_TYPE_IMROPE = GGML_ROPE_TYPE_IMROPE,
86
87
  LLAMA_ROPE_TYPE_VISION = GGML_ROPE_TYPE_VISION,
87
88
  };
88
89
 
@@ -179,6 +180,14 @@ extern "C" {
179
180
  LLAMA_ATTENTION_TYPE_NON_CAUSAL = 1,
180
181
  };
181
182
 
183
+ enum llama_flash_attn_type {
184
+ LLAMA_FLASH_ATTN_TYPE_AUTO = -1,
185
+ LLAMA_FLASH_ATTN_TYPE_DISABLED = 0,
186
+ LLAMA_FLASH_ATTN_TYPE_ENABLED = 1,
187
+ };
188
+
189
+ LLAMA_API const char * llama_flash_attn_type_name(enum llama_flash_attn_type flash_attn_type);
190
+
182
191
  enum llama_split_mode {
183
192
  LLAMA_SPLIT_MODE_NONE = 0, // single GPU
184
193
  LLAMA_SPLIT_MODE_LAYER = 1, // split layers and KV across GPUs
@@ -198,7 +207,7 @@ extern "C" {
198
207
  llama_token_data * data;
199
208
  size_t size;
200
209
  int64_t selected; // this is the index in the data array (i.e. not the token id)
201
- bool sorted;
210
+ bool sorted; // note: do not assume the data is sorted - always check this flag
202
211
  } llama_token_data_array;
203
212
 
204
213
  typedef bool (*llama_progress_callback)(float progress, void * user_data);
@@ -288,6 +297,7 @@ extern "C" {
288
297
  bool use_mlock; // force system to keep model in RAM
289
298
  bool check_tensors; // validate model tensor data
290
299
  bool use_extra_bufts; // use extra buffer types (used for weight repacking)
300
+ bool no_host; // bypass host buffer allowing extra buffers to be used
291
301
  };
292
302
 
293
303
  // NOTE: changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
@@ -303,6 +313,7 @@ extern "C" {
303
313
  enum llama_rope_scaling_type rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type`
304
314
  enum llama_pooling_type pooling_type; // whether to pool (sum) embedding results by sequence id
305
315
  enum llama_attention_type attention_type; // attention type to use for embeddings
316
+ enum llama_flash_attn_type flash_attn_type; // when to enable Flash Attention
306
317
 
307
318
  // ref: https://github.com/ggml-org/llama.cpp/pull/2054
308
319
  float rope_freq_base; // RoPE base frequency, 0 = from model
@@ -329,7 +340,6 @@ extern "C" {
329
340
  // Keep the booleans together and at the end of the struct to avoid misalignment during copy-by-value.
330
341
  bool embeddings; // if true, extract embeddings (together with logits)
331
342
  bool offload_kqv; // offload the KQV ops (including the KV cache) to GPU
332
- bool flash_attn; // use flash attention [EXPERIMENTAL]
333
343
  bool no_perf; // measure performance timings
334
344
  bool op_offload; // offload host tensor operations to device
335
345
  bool swa_full; // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
@@ -451,7 +461,11 @@ extern "C" {
451
461
  LLAMA_API bool llama_supports_gpu_offload(void);
452
462
  LLAMA_API bool llama_supports_rpc (void);
453
463
 
464
+ // NOTE: After creating a llama_context, it is recommended to query the actual values using these functions
465
+ // In some cases the requested values via llama_context_params may differ from the actual values used by the context
466
+ // ref: https://github.com/ggml-org/llama.cpp/pull/17046#discussion_r2503085732
454
467
  LLAMA_API uint32_t llama_n_ctx (const struct llama_context * ctx);
468
+ LLAMA_API uint32_t llama_n_ctx_seq (const struct llama_context * ctx);
455
469
  LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx);
456
470
  LLAMA_API uint32_t llama_n_ubatch (const struct llama_context * ctx);
457
471
  LLAMA_API uint32_t llama_n_seq_max (const struct llama_context * ctx);
@@ -472,6 +486,7 @@ extern "C" {
472
486
 
473
487
  LLAMA_API int32_t llama_model_n_ctx_train(const struct llama_model * model);
474
488
  LLAMA_API int32_t llama_model_n_embd (const struct llama_model * model);
489
+ LLAMA_API int32_t llama_model_n_embd_inp (const struct llama_model * model);
475
490
  LLAMA_API int32_t llama_model_n_layer (const struct llama_model * model);
476
491
  LLAMA_API int32_t llama_model_n_head (const struct llama_model * model);
477
492
  LLAMA_API int32_t llama_model_n_head_kv (const struct llama_model * model);
@@ -535,6 +550,9 @@ extern "C" {
535
550
  // Returns true if the model is recurrent (like Mamba, RWKV, etc.)
536
551
  LLAMA_API bool llama_model_is_recurrent(const struct llama_model * model);
537
552
 
553
+ // Returns true if the model is hybrid (like Jamba, Granite, etc.)
554
+ LLAMA_API bool llama_model_is_hybrid(const struct llama_model * model);
555
+
538
556
  // Returns true if the model is diffusion-based (like LLaDA, Dream, etc.)
539
557
  LLAMA_API bool llama_model_is_diffusion(const struct llama_model * model);
540
558
 
@@ -572,9 +590,13 @@ extern "C" {
572
590
  LLAMA_API int32_t llama_adapter_meta_val_str_by_index(const struct llama_adapter_lora * adapter, int32_t i, char * buf, size_t buf_size);
573
591
 
574
592
  // Manually free a LoRA adapter
575
- // Note: loaded adapters will be free when the associated model is deleted
593
+ // NOTE: loaded adapters will be free when the associated model is deleted
576
594
  LLAMA_API void llama_adapter_lora_free(struct llama_adapter_lora * adapter);
577
595
 
596
+ // Get the invocation tokens if the current lora is an alora
597
+ LLAMA_API uint64_t llama_adapter_get_alora_n_invocation_tokens(const struct llama_adapter_lora * adapter);
598
+ LLAMA_API const llama_token * llama_adapter_get_alora_invocation_tokens (const struct llama_adapter_lora * adapter);
599
+
578
600
  // The following functions operate on a llama_context, hence the naming: llama_verb_...
579
601
 
580
602
  // Add a loaded LoRA adapter to given context
@@ -779,8 +801,12 @@ extern "C" {
779
801
  size_t n_token_capacity,
780
802
  size_t * n_token_count_out);
781
803
 
804
+ // for backwards-compat
782
805
  #define LLAMA_STATE_SEQ_FLAGS_SWA_ONLY 1
783
806
 
807
+ // work only with partial states, such as SWA KV cache or recurrent cache (e.g. Mamba)
808
+ #define LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY 1
809
+
784
810
  typedef uint32_t llama_state_seq_flags;
785
811
 
786
812
  LLAMA_API size_t llama_state_seq_get_size_ext(
@@ -1090,8 +1116,6 @@ extern "C" {
1090
1116
  // // sample from the logits of the last token in the batch
1091
1117
  // const llama_token id = llama_sampler_sample(smpl, ctx, -1);
1092
1118
  //
1093
- // // accepting the token updates the internal state of certain samplers (e.g. grammar, repetition, etc.)
1094
- // llama_sampler_accept(smpl, id);
1095
1119
  // ...
1096
1120
  // }
1097
1121
  //
@@ -1148,11 +1172,6 @@ extern "C" {
1148
1172
  LLAMA_API struct llama_sampler * llama_sampler_init_greedy(void);
1149
1173
  LLAMA_API struct llama_sampler * llama_sampler_init_dist (uint32_t seed);
1150
1174
 
1151
- /// @details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.
1152
- /// NOTE: Avoid using on the full vocabulary as the sorting can become slow. For example, apply top-k or top-p sampling first.
1153
- DEPRECATED(LLAMA_API struct llama_sampler * llama_sampler_init_softmax (void),
1154
- "will be removed in the future (see https://github.com/ggml-org/llama.cpp/pull/9896#discussion_r1800920915)");
1155
-
1156
1175
  /// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
1157
1176
  /// Setting k <= 0 makes this a noop
1158
1177
  LLAMA_API struct llama_sampler * llama_sampler_init_top_k (int32_t k);
@@ -1322,24 +1341,25 @@ extern "C" {
1322
1341
  //
1323
1342
  // Performance utils
1324
1343
  //
1325
- // NOTE: Used by llama.cpp examples, avoid using in third-party apps. Instead, do your own performance measurements.
1344
+ // NOTE: Used by llama.cpp examples/tools, avoid using in third-party apps. Instead, do your own performance measurements.
1326
1345
  //
1327
1346
 
1328
1347
  struct llama_perf_context_data {
1329
- double t_start_ms;
1330
- double t_load_ms;
1331
- double t_p_eval_ms;
1332
- double t_eval_ms;
1333
-
1334
- int32_t n_p_eval;
1335
- int32_t n_eval;
1336
- int32_t n_reused; // number of times a ggml compute graph had been reused
1348
+ // ms == milliseconds
1349
+ double t_start_ms; // absolute start time
1350
+ double t_load_ms; // time needed for loading the model
1351
+ double t_p_eval_ms; // time needed for processing the prompt
1352
+ double t_eval_ms; // time needed for generating tokens
1353
+
1354
+ int32_t n_p_eval; // number of prompt tokens
1355
+ int32_t n_eval; // number of generated tokens
1356
+ int32_t n_reused; // number of times a ggml compute graph had been reused
1337
1357
  };
1338
1358
 
1339
1359
  struct llama_perf_sampler_data {
1340
- double t_sample_ms;
1360
+ double t_sample_ms; // time needed for sampling in ms
1341
1361
 
1342
- int32_t n_sample;
1362
+ int32_t n_sample; // number of sampled tokens
1343
1363
  };
1344
1364
 
1345
1365
  LLAMA_API struct llama_perf_context_data llama_perf_context (const struct llama_context * ctx);
@@ -1351,6 +1371,9 @@ extern "C" {
1351
1371
  LLAMA_API void llama_perf_sampler_print(const struct llama_sampler * chain);
1352
1372
  LLAMA_API void llama_perf_sampler_reset( struct llama_sampler * chain);
1353
1373
 
1374
+ // print a breakdown of per-device memory use via LLAMA_LOG:
1375
+ LLAMA_API void llama_memory_breakdown_print(const struct llama_context * ctx);
1376
+
1354
1377
  //
1355
1378
  // training
1356
1379
  //
@@ -0,0 +1,77 @@
1
+ <?xml version="1.0" encoding="UTF-8" standalone="no"?>
2
+ <svg
3
+ id="Layer_1"
4
+ version="1.1"
5
+ viewBox="0 0 250 250"
6
+ sodipodi:docname="llama1-icon-transparent.svg"
7
+ width="250"
8
+ height="250"
9
+ inkscape:version="1.4.2 (ebf0e940d0, 2025-05-08)"
10
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
11
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
12
+ xmlns="http://www.w3.org/2000/svg"
13
+ xmlns:svg="http://www.w3.org/2000/svg">
14
+ <sodipodi:namedview
15
+ id="namedview7"
16
+ pagecolor="#505050"
17
+ bordercolor="#ffffff"
18
+ borderopacity="1"
19
+ inkscape:showpageshadow="0"
20
+ inkscape:pageopacity="0"
21
+ inkscape:pagecheckerboard="1"
22
+ inkscape:deskcolor="#505050"
23
+ inkscape:zoom="2.48"
24
+ inkscape:cx="49.596774"
25
+ inkscape:cy="189.91935"
26
+ inkscape:window-width="3440"
27
+ inkscape:window-height="1440"
28
+ inkscape:window-x="0"
29
+ inkscape:window-y="0"
30
+ inkscape:window-maximized="1"
31
+ inkscape:current-layer="Layer_1" />
32
+ <!-- Generator: Adobe Illustrator 29.3.1, SVG Export Plug-In . SVG Version: 2.1.0 Build 151) -->
33
+ <defs
34
+ id="defs1">
35
+ <style
36
+ id="style1">
37
+ .st0 {
38
+ fill: #ff8236;
39
+ }
40
+
41
+ .st1 {
42
+ fill: #fff;
43
+ }
44
+
45
+ .st2 {
46
+ fill: #1b1f20;
47
+ }
48
+ </style>
49
+ </defs>
50
+ <g
51
+ id="g7">
52
+ <g
53
+ id="g6"
54
+ transform="translate(-995.51066,-129.70875)">
55
+ <path
56
+ class="st0"
57
+ d="m 1163.3,226.8 -13.5,24 c -17.8,-13.7 -44.2,-15.7 -62,-1 -28.7,23.7 -26.7,78.5 18,78.8 12.5,0 23.1,-5.9 34.5,-9.8 l 6,23.9 c -10.1,4.7 -20.4,9.5 -31.5,11 -101.2,13.8 -95.4,-132.3 -3.9,-139.9 19.2,-1.6 36.1,3.4 52.5,13 z"
58
+ id="path4" />
59
+ <path
60
+ class="st0"
61
+ d="m 1093.4,203.8 c -15.4,4.6 -29.7,13.1 -40.5,25 -2,-24.2 3.4,-73.1 30.3,-82.7 4,-1.4 17.7,-4.9 17.3,2.2 -0.4,7.1 -9.9,19.3 -12.2,25.9 -4,11.6 -0.3,19.6 5.2,29.7 z"
62
+ id="path5" />
63
+ <polygon
64
+ class="st0"
65
+ points="1131.4,307.8 1116.4,307.8 1116.4,290.8 1099.4,290.8 1099.4,276.8 1114.9,276.8 1116.4,275.3 1116.4,258.8 1131.4,258.8 1131.4,276.8 1147.4,276.8 1147.4,290.8 1131.4,290.8 "
66
+ id="polygon5" />
67
+ <polygon
68
+ class="st0"
69
+ points="1186.4,290.8 1186.4,307.8 1171.4,307.8 1171.4,290.8 1155.4,290.8 1155.4,276.8 1171.4,276.8 1171.4,258.8 1186.4,258.8 1186.4,275.3 1187.9,276.8 1203.4,276.8 1203.4,290.8 "
70
+ id="polygon6" />
71
+ <path
72
+ class="st0"
73
+ d="m 1142.3,156.9 c 2,3 -9.3,15.9 -11.1,19.2 -5.2,9.8 -1.7,15.4 2.2,24.7 -11.3,-1.7 -21.8,-0.3 -33,1 2.5,-21.5 14.6,-52.8 41.9,-44.9 z"
74
+ id="path6" />
75
+ </g>
76
+ </g>
77
+ </svg>
@@ -0,0 +1,87 @@
1
+ <?xml version="1.0" encoding="UTF-8" standalone="no"?>
2
+ <svg
3
+ id="Layer_1"
4
+ version="1.1"
5
+ viewBox="0 0 250 250"
6
+ sodipodi:docname="llama-icon.svg"
7
+ width="250"
8
+ height="250"
9
+ inkscape:version="1.4.2 (ebf0e940d0, 2025-05-08)"
10
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
11
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
12
+ xmlns="http://www.w3.org/2000/svg"
13
+ xmlns:svg="http://www.w3.org/2000/svg">
14
+ <sodipodi:namedview
15
+ id="namedview7"
16
+ pagecolor="#505050"
17
+ bordercolor="#ffffff"
18
+ borderopacity="1"
19
+ inkscape:showpageshadow="0"
20
+ inkscape:pageopacity="0"
21
+ inkscape:pagecheckerboard="1"
22
+ inkscape:deskcolor="#505050"
23
+ inkscape:zoom="2.48"
24
+ inkscape:cx="146.57258"
25
+ inkscape:cy="189.91936"
26
+ inkscape:window-width="3440"
27
+ inkscape:window-height="1440"
28
+ inkscape:window-x="0"
29
+ inkscape:window-y="0"
30
+ inkscape:window-maximized="1"
31
+ inkscape:current-layer="g7" />
32
+ <!-- Generator: Adobe Illustrator 29.3.1, SVG Export Plug-In . SVG Version: 2.1.0 Build 151) -->
33
+ <defs
34
+ id="defs1">
35
+ <style
36
+ id="style1">
37
+ .st0 {
38
+ fill: #ff8236;
39
+ }
40
+
41
+ .st1 {
42
+ fill: #fff;
43
+ }
44
+
45
+ .st2 {
46
+ fill: #1b1f20;
47
+ }
48
+ </style>
49
+ </defs>
50
+ <rect
51
+ class="st2"
52
+ width="250"
53
+ height="250"
54
+ rx="8.6857386"
55
+ ry="8.7008333"
56
+ id="rect1"
57
+ x="0"
58
+ y="0"
59
+ style="stroke-width:0.266071" />
60
+ <g
61
+ id="g7">
62
+ <g
63
+ id="g6"
64
+ transform="translate(-995.51066,-129.70875)">
65
+ <path
66
+ class="st0"
67
+ d="m 1163.3,226.8 -13.5,24 c -17.8,-13.7 -44.2,-15.7 -62,-1 -28.7,23.7 -26.7,78.5 18,78.8 12.5,0 23.1,-5.9 34.5,-9.8 l 6,23.9 c -10.1,4.7 -20.4,9.5 -31.5,11 -101.2,13.8 -95.4,-132.3 -3.9,-139.9 19.2,-1.6 36.1,3.4 52.5,13 z"
68
+ id="path4" />
69
+ <path
70
+ class="st0"
71
+ d="m 1093.4,203.8 c -15.4,4.6 -29.7,13.1 -40.5,25 -2,-24.2 3.4,-73.1 30.3,-82.7 4,-1.4 17.7,-4.9 17.3,2.2 -0.4,7.1 -9.9,19.3 -12.2,25.9 -4,11.6 -0.3,19.6 5.2,29.7 z"
72
+ id="path5" />
73
+ <polygon
74
+ class="st0"
75
+ points="1131.4,307.8 1116.4,307.8 1116.4,290.8 1099.4,290.8 1099.4,276.8 1114.9,276.8 1116.4,275.3 1116.4,258.8 1131.4,258.8 1131.4,276.8 1147.4,276.8 1147.4,290.8 1131.4,290.8 "
76
+ id="polygon5" />
77
+ <polygon
78
+ class="st0"
79
+ points="1186.4,290.8 1186.4,307.8 1171.4,307.8 1171.4,290.8 1155.4,290.8 1155.4,276.8 1171.4,276.8 1171.4,258.8 1186.4,258.8 1186.4,275.3 1187.9,276.8 1203.4,276.8 1203.4,290.8 "
80
+ id="polygon6" />
81
+ <path
82
+ class="st0"
83
+ d="m 1142.3,156.9 c 2,3 -9.3,15.9 -11.1,19.2 -5.2,9.8 -1.7,15.4 2.2,24.7 -11.3,-1.7 -21.8,-0.3 -33,1 2.5,-21.5 14.6,-52.8 41.9,-44.9 z"
84
+ id="path6" />
85
+ </g>
86
+ </g>
87
+ </svg>
@@ -14,3 +14,5 @@
14
14
  -r ./requirements-tool_bench.txt
15
15
 
16
16
  -r ./requirements-gguf_editor_gui.txt
17
+
18
+ -r ../examples/model-conversion/requirements.txt
@@ -1,8 +1,8 @@
1
- mistral-common>=1.8.3
2
-
3
1
  -r ./requirements-convert_legacy_llama.txt
4
2
  --extra-index-url https://download.pytorch.org/whl/cpu
5
- torch~=2.4.0; platform_machine != "s390x"
3
+
4
+ ## Embedding Gemma requires PyTorch 2.6.0 or later
5
+ torch~=2.6.0; platform_machine != "s390x"
6
6
 
7
7
  # torch s390x packages can only be found from nightly builds
8
8
  --extra-index-url https://download.pytorch.org/whl/nightly
@@ -1,5 +1,7 @@
1
1
  numpy~=1.26.4
2
2
  sentencepiece~=0.2.0
3
- transformers>=4.45.1,<5.0.0
3
+
4
+ transformers>=4.57.1,<5.0.0
5
+
4
6
  gguf>=0.1.0
5
7
  protobuf>=4.21.0,<5.0.0
@@ -1,6 +1,6 @@
1
1
  aiohttp~=3.9.3
2
2
  pytest~=8.3.3
3
- huggingface_hub~=0.23.2
3
+ huggingface_hub>=0.34.0,<1.0
4
4
  matplotlib~=3.10.0
5
5
  numpy~=1.26.4
6
6
  openai~=1.55.3
@@ -35,8 +35,109 @@ add_library(llama
35
35
  unicode-data.cpp
36
36
  unicode.cpp
37
37
  unicode.h
38
+ models/afmoe.cpp
39
+ models/apertus.cpp
40
+ models/arcee.cpp
41
+ models/arctic.cpp
42
+ models/arwkv7.cpp
43
+ models/baichuan.cpp
44
+ models/bailingmoe.cpp
45
+ models/bailingmoe2.cpp
46
+ models/bert.cpp
47
+ models/bitnet.cpp
48
+ models/bloom.cpp
49
+ models/chameleon.cpp
50
+ models/chatglm.cpp
51
+ models/codeshell.cpp
52
+ models/cogvlm.cpp
53
+ models/cohere2-iswa.cpp
54
+ models/command-r.cpp
55
+ models/dbrx.cpp
56
+ models/deci.cpp
57
+ models/deepseek.cpp
58
+ models/deepseek2.cpp
59
+ models/dots1.cpp
60
+ models/dream.cpp
61
+ models/ernie4-5-moe.cpp
62
+ models/ernie4-5.cpp
63
+ models/exaone.cpp
64
+ models/exaone4.cpp
65
+ models/falcon-h1.cpp
66
+ models/falcon.cpp
67
+ models/gemma-embedding.cpp
68
+ models/gemma.cpp
69
+ models/gemma2-iswa.cpp
70
+ models/gemma3-iswa.cpp
71
+ models/gemma3n-iswa.cpp
72
+ models/glm4-moe.cpp
73
+ models/glm4.cpp
74
+ models/gpt2.cpp
75
+ models/gptneox.cpp
76
+ models/granite-hybrid.cpp
77
+ models/granite.cpp
78
+ models/grok.cpp
79
+ models/grovemoe.cpp
80
+ models/hunyuan-dense.cpp
81
+ models/hunyuan-moe.cpp
82
+ models/internlm2.cpp
83
+ models/jais.cpp
84
+ models/jamba.cpp
85
+ models/lfm2.cpp
86
+ models/llada-moe.cpp
87
+ models/llada.cpp
88
+ models/llama-iswa.cpp
89
+ models/llama.cpp
90
+ models/mamba.cpp
91
+ models/minicpm3.cpp
92
+ models/minimax-m2.cpp
93
+ models/mpt.cpp
94
+ models/nemotron-h.cpp
95
+ models/nemotron.cpp
96
+ models/neo-bert.cpp
97
+ models/olmo.cpp
98
+ models/olmo2.cpp
99
+ models/olmoe.cpp
100
+ models/openai-moe-iswa.cpp
101
+ models/openelm.cpp
102
+ models/orion.cpp
103
+ models/pangu-embedded.cpp
104
+ models/phi2.cpp
105
+ models/phi3.cpp
106
+ models/plamo.cpp
107
+ models/plamo2.cpp
108
+ models/plm.cpp
109
+ models/qwen.cpp
110
+ models/qwen2.cpp
111
+ models/qwen2moe.cpp
112
+ models/qwen2vl.cpp
113
+ models/qwen3.cpp
114
+ models/qwen3vl.cpp
115
+ models/qwen3vl-moe.cpp
116
+ models/qwen3moe.cpp
117
+ models/refact.cpp
118
+ models/rwkv6-base.cpp
119
+ models/rwkv6.cpp
120
+ models/rwkv6qwen2.cpp
121
+ models/rwkv7-base.cpp
122
+ models/rwkv7.cpp
123
+ models/seed-oss.cpp
124
+ models/smallthinker.cpp
125
+ models/smollm3.cpp
126
+ models/stablelm.cpp
127
+ models/starcoder.cpp
128
+ models/starcoder2.cpp
129
+ models/t5-dec.cpp
130
+ models/t5-enc.cpp
131
+ models/wavtokenizer-dec.cpp
132
+ models/xverse.cpp
133
+ models/graph-context-mamba.cpp
38
134
  )
39
135
 
136
+ set_target_properties(llama PROPERTIES
137
+ VERSION ${LLAMA_INSTALL_VERSION}
138
+ SOVERSION 0
139
+ )
140
+
40
141
  target_include_directories(llama PRIVATE .)
41
142
  target_include_directories(llama PUBLIC ../include)
42
143
  target_compile_features (llama PRIVATE cxx_std_17) # don't bump
@@ -6,6 +6,7 @@
6
6
 
7
7
  #include <map>
8
8
  #include <cassert>
9
+ #include <sstream>
9
10
  #include <stdexcept>
10
11
 
11
12
  // vec
@@ -215,6 +216,26 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
215
216
  }
216
217
 
217
218
  adapter.alpha = get_kv_f32(llm_kv(LLM_KV_ADAPTER_LORA_ALPHA));
219
+
220
+ // parse alora invocation sequence vector
221
+ const auto & key = llm_kv(LLM_KV_ADAPTER_ALORA_INVOCATION_TOKENS);
222
+ const int kid = gguf_find_key(ctx_gguf.get(), key.c_str());
223
+ if (kid >= 0) {
224
+ if (gguf_get_kv_type(ctx_gguf.get(), kid) != GGUF_TYPE_ARRAY) {
225
+ throw std::runtime_error("invalid gguf type for " + key);
226
+ }
227
+ const auto arr_type = gguf_get_arr_type(ctx_gguf.get(), kid);
228
+ if (arr_type != GGUF_TYPE_UINT32) {
229
+ throw std::runtime_error("invalid gguf element type for " + key);
230
+ }
231
+ const size_t seq_len = gguf_get_arr_n(ctx_gguf.get(), kid);
232
+ const void * data = gguf_get_arr_data(ctx_gguf.get(), kid);
233
+ adapter.alora_invocation_tokens.resize(seq_len);
234
+ std::copy(
235
+ (const llama_token *)data,
236
+ (const llama_token *)data + seq_len,
237
+ adapter.alora_invocation_tokens.begin());
238
+ }
218
239
  }
219
240
 
220
241
  int n_tensors = gguf_get_n_tensors(ctx_gguf.get());
@@ -450,3 +471,15 @@ int32_t llama_adapter_meta_val_str_by_index(const llama_adapter_lora * adapter,
450
471
  void llama_adapter_lora_free(llama_adapter_lora * adapter) {
451
472
  delete adapter;
452
473
  }
474
+
475
+ uint64_t llama_adapter_get_alora_n_invocation_tokens(const struct llama_adapter_lora * adapter) {
476
+ if (!adapter) {
477
+ return 0;
478
+ }
479
+ return adapter->alora_invocation_tokens.size();
480
+ }
481
+
482
+ const llama_token * llama_adapter_get_alora_invocation_tokens(const llama_adapter_lora * adapter) {
483
+ GGML_ASSERT(adapter);
484
+ return adapter->alora_invocation_tokens.data();
485
+ }
@@ -70,6 +70,9 @@ struct llama_adapter_lora {
70
70
  // gguf metadata
71
71
  std::unordered_map<std::string, std::string> gguf_kv;
72
72
 
73
+ // activated lora (aLoRA)
74
+ std::vector<llama_token> alora_invocation_tokens;
75
+
73
76
  llama_adapter_lora() = default;
74
77
  ~llama_adapter_lora() = default;
75
78