@fugood/llama.node 0.3.16 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (281) hide show
  1. package/CMakeLists.txt +6 -1
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +44 -2
  19. package/lib/index.js +132 -1
  20. package/lib/index.ts +203 -3
  21. package/package.json +2 -1
  22. package/src/EmbeddingWorker.cpp +1 -1
  23. package/src/LlamaCompletionWorker.cpp +374 -19
  24. package/src/LlamaCompletionWorker.h +31 -10
  25. package/src/LlamaContext.cpp +216 -7
  26. package/src/LlamaContext.h +12 -0
  27. package/src/common.hpp +15 -0
  28. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
  29. package/src/llama.cpp/.github/workflows/build.yml +89 -767
  30. package/src/llama.cpp/.github/workflows/docker.yml +9 -6
  31. package/src/llama.cpp/.github/workflows/release.yml +716 -0
  32. package/src/llama.cpp/.github/workflows/server.yml +19 -23
  33. package/src/llama.cpp/CMakeLists.txt +11 -1
  34. package/src/llama.cpp/cmake/build-info.cmake +8 -2
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
  36. package/src/llama.cpp/common/CMakeLists.txt +35 -4
  37. package/src/llama.cpp/common/arg.cpp +844 -121
  38. package/src/llama.cpp/common/arg.h +9 -0
  39. package/src/llama.cpp/common/chat.cpp +129 -107
  40. package/src/llama.cpp/common/chat.h +2 -0
  41. package/src/llama.cpp/common/common.cpp +64 -518
  42. package/src/llama.cpp/common/common.h +35 -45
  43. package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
  44. package/src/llama.cpp/common/llguidance.cpp +31 -47
  45. package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
  46. package/src/llama.cpp/common/minja/minja.hpp +186 -127
  47. package/src/llama.cpp/common/regex-partial.cpp +204 -0
  48. package/src/llama.cpp/common/regex-partial.h +56 -0
  49. package/src/llama.cpp/common/sampling.cpp +60 -50
  50. package/src/llama.cpp/docs/build.md +122 -7
  51. package/src/llama.cpp/examples/CMakeLists.txt +2 -32
  52. package/src/llama.cpp/examples/batched/batched.cpp +1 -1
  53. package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
  54. package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
  55. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  56. package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
  57. package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
  58. package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
  59. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
  60. package/src/llama.cpp/examples/sycl/build.sh +2 -2
  61. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
  62. package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
  63. package/src/llama.cpp/examples/training/finetune.cpp +96 -0
  64. package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
  65. package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
  66. package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
  67. package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
  68. package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
  69. package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
  70. package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
  71. package/src/llama.cpp/ggml/include/ggml.h +76 -106
  72. package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
  73. package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
  74. package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
  75. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
  76. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
  77. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
  78. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
  79. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
  80. package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
  81. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
  82. package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
  83. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
  84. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  85. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  86. package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
  87. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
  88. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
  89. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
  90. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
  91. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
  92. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
  93. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
  94. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
  95. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
  96. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
  97. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
  98. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
  99. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
  100. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  101. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  102. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
  103. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
  104. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
  105. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
  106. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
  107. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
  108. package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
  109. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
  110. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
  111. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
  112. package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
  113. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
  114. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
  115. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
  116. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
  117. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
  118. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  119. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
  120. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
  121. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
  122. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
  123. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  124. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
  125. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
  126. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
  127. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
  128. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  129. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
  130. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
  131. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
  132. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
  133. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
  134. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
  135. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
  136. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
  137. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
  138. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
  140. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
  141. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
  142. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
  143. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
  144. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
  145. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
  146. package/src/llama.cpp/ggml/src/ggml.c +170 -265
  147. package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
  148. package/src/llama.cpp/include/llama.h +82 -22
  149. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
  150. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
  151. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
  152. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
  153. package/src/llama.cpp/requirements/requirements-all.txt +5 -3
  154. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
  155. package/src/llama.cpp/scripts/xxd.cmake +1 -1
  156. package/src/llama.cpp/src/CMakeLists.txt +4 -2
  157. package/src/llama.cpp/src/llama-adapter.cpp +43 -1
  158. package/src/llama.cpp/src/llama-arch.cpp +163 -17
  159. package/src/llama.cpp/src/llama-arch.h +16 -0
  160. package/src/llama.cpp/src/llama-batch.cpp +5 -1
  161. package/src/llama.cpp/src/llama-batch.h +2 -1
  162. package/src/llama.cpp/src/llama-chat.cpp +91 -16
  163. package/src/llama.cpp/src/llama-chat.h +7 -2
  164. package/src/llama.cpp/src/llama-context.cpp +479 -575
  165. package/src/llama.cpp/src/llama-context.h +44 -33
  166. package/src/llama.cpp/src/llama-cparams.h +1 -0
  167. package/src/llama.cpp/src/llama-graph.cpp +209 -157
  168. package/src/llama.cpp/src/llama-graph.h +38 -14
  169. package/src/llama.cpp/src/llama-hparams.h +13 -0
  170. package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
  171. package/src/llama.cpp/src/llama-kv-cache.h +283 -171
  172. package/src/llama.cpp/src/llama-memory.h +12 -2
  173. package/src/llama.cpp/src/llama-mmap.cpp +1 -1
  174. package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
  175. package/src/llama.cpp/src/llama-model-loader.h +5 -3
  176. package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
  177. package/src/llama.cpp/src/llama-model-saver.h +37 -0
  178. package/src/llama.cpp/src/llama-model.cpp +1803 -330
  179. package/src/llama.cpp/src/llama-model.h +21 -2
  180. package/src/llama.cpp/src/llama-quant.cpp +33 -10
  181. package/src/llama.cpp/src/llama-sampling.cpp +25 -7
  182. package/src/llama.cpp/src/llama-vocab.cpp +86 -10
  183. package/src/llama.cpp/src/llama-vocab.h +6 -0
  184. package/src/llama.cpp/src/llama.cpp +15 -1
  185. package/src/llama.cpp/tests/CMakeLists.txt +52 -31
  186. package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
  187. package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
  188. package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
  189. package/src/llama.cpp/tests/test-chat.cpp +15 -3
  190. package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
  191. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
  192. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
  193. package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
  194. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
  195. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
  196. package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
  197. package/src/llama.cpp/tests/test-opt.cpp +33 -21
  198. package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
  199. package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
  200. package/src/llama.cpp/tests/test-sampling.cpp +1 -1
  201. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
  202. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
  203. package/src/llama.cpp/tools/CMakeLists.txt +39 -0
  204. package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
  205. package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
  206. package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
  207. package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
  208. package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
  209. package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
  210. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
  211. package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
  212. package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
  213. package/src/llama.cpp/tools/mtmd/clip.h +99 -0
  214. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
  215. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
  216. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
  217. package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
  218. package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
  219. package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
  220. package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
  221. package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
  222. package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
  223. package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
  224. package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
  225. package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
  226. package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
  227. package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
  228. package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
  229. package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
  230. package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
  231. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
  232. package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
  233. package/src/llama.cpp/examples/infill/infill.cpp +0 -590
  234. package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
  235. package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
  236. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
  237. package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
  238. package/src/llama.cpp/examples/llava/clip.h +0 -118
  239. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
  240. package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
  241. package/src/llama.cpp/examples/llava/llava.cpp +0 -574
  242. package/src/llama.cpp/examples/llava/llava.h +0 -49
  243. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
  244. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
  245. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
  246. package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
  247. package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
  248. package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
  249. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
  250. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
  251. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
  252. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
  253. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
  254. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
  255. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
  256. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
  257. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
  258. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
  259. /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
  260. /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
  261. /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
  262. /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
  263. /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
  264. /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
  265. /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
  266. /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
  267. /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
  268. /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
  269. /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
  270. /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
  271. /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
  272. /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
  273. /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
  274. /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
  275. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
  276. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
  277. /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
  278. /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
  279. /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
  280. /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
  281. /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
@@ -12,12 +12,30 @@ if (CUDAToolkit_FOUND)
12
12
  # 61 == Pascal, __dp4a instruction (per-byte integer dot product)
13
13
  # 70 == V100, FP16 tensor cores
14
14
  # 75 == Turing, int8 tensor cores
15
+ # 80 == Ampere, asynchronous data loading, faster tensor core instructions
16
+ # 86 == RTX 3000, needs CUDA v11.1
17
+ # 89 == RTX 4000, needs CUDA v11.8
18
+ #
19
+ # XX-virtual == compile CUDA code as PTX, do JIT compilation to binary code on first run
20
+ # XX-real == compile CUDA code as device code for this specific architecture
21
+ # no suffix == compile as both PTX and device code
22
+ #
23
+ # The default behavior for a non-native is to build virtual architectures as needed to cover all features needed
24
+ # for best performance and to also build real architectures for the most commonly used GPUs.
15
25
  if (GGML_NATIVE AND CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.6" AND CMAKE_VERSION VERSION_GREATER_EQUAL "3.24")
16
26
  set(CMAKE_CUDA_ARCHITECTURES "native")
17
27
  elseif(GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
18
- set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75;80")
28
+ if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.8")
29
+ set(CMAKE_CUDA_ARCHITECTURES "60-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-real;89-real")
30
+ else()
31
+ set(CMAKE_CUDA_ARCHITECTURES "60-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-real")
32
+ endif()
19
33
  else()
20
- set(CMAKE_CUDA_ARCHITECTURES "50;61;70;75;80")
34
+ if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.8")
35
+ set(CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-real;89-real")
36
+ else()
37
+ set(CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-real")
38
+ endif()
21
39
  endif()
22
40
  endif()
23
41
  message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
@@ -100,7 +118,7 @@ if (CUDAToolkit_FOUND)
100
118
 
101
119
  set(CUDA_CXX_FLAGS "")
102
120
 
103
- set(CUDA_FLAGS -use_fast_math)
121
+ set(CUDA_FLAGS -use_fast_math -extended-lambda)
104
122
 
105
123
  if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.8")
106
124
  # Options are:
@@ -133,6 +151,7 @@ if (CUDAToolkit_FOUND)
133
151
  COMMAND ${NVCC_CMD} -Xcompiler "-dumpfullversion -dumpversion"
134
152
  OUTPUT_VARIABLE CUDA_CCVER
135
153
  ERROR_QUIET
154
+ OUTPUT_STRIP_TRAILING_WHITESPACE
136
155
  )
137
156
  else()
138
157
  if (CUDA_CCFULLVER MATCHES Apple)
@@ -143,7 +162,7 @@ if (CUDAToolkit_FOUND)
143
162
  string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" CUDA_CCVER ${CUDA_CCFULLVER})
144
163
  endif()
145
164
 
146
- message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")
165
+ message(STATUS "CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")
147
166
 
148
167
  ggml_get_flags(${CUDA_CCID} ${CUDA_CCVER})
149
168
  list(APPEND CUDA_CXX_FLAGS ${CXX_FLAGS} ${GF_CXX_FLAGS}) # This is passed to -Xcompiler later
@@ -20,6 +20,7 @@
20
20
  #define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS
21
21
  #define CUBLAS_TF32_TENSOR_OP_MATH 0
22
22
  #define CUDA_R_16F HIPBLAS_R_16F
23
+ #define CUDA_R_16BF HIPBLAS_R_16B
23
24
  #define CUDA_R_32F HIPBLAS_R_32F
24
25
  #define CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED hipDeviceAttributeVirtualMemoryManagementSupported
25
26
  #define CU_MEM_ALLOC_GRANULARITY_RECOMMENDED hipMemAllocationGranularityRecommended
@@ -70,6 +71,8 @@
70
71
  #define cudaLaunchHostFunc hipLaunchHostFunc
71
72
  #define cudaMalloc hipMalloc
72
73
  #define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size, hipHostMallocDefault)
74
+ #define cudaMallocManaged hipMallocManaged
75
+ #define cudaMemAdvise hipMemAdvise
73
76
  #define cudaMemcpy hipMemcpy
74
77
  #define cudaMemcpyAsync hipMemcpyAsync
75
78
  #define cudaMemcpyPeerAsync hipMemcpyPeerAsync
@@ -151,6 +154,10 @@
151
154
  #define CDNA
152
155
  #endif
153
156
 
157
+ #if defined(__GFX12__)
158
+ #define RDNA4
159
+ #endif
160
+
154
161
  #if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) || \
155
162
  defined(__gfx1150__) || defined(__gfx1151__)
156
163
  #define RDNA3
@@ -15,6 +15,7 @@
15
15
  #define CUBLAS_STATUS_SUCCESS MUBLAS_STATUS_SUCCESS
16
16
  #define CUBLAS_TF32_TENSOR_OP_MATH MUBLAS_MATH_MODE_DEFAULT
17
17
  #define CUDA_R_16F MUSA_R_16F
18
+ #define CUDA_R_16BF MUSA_R_16BF
18
19
  #define CUDA_R_32F MUSA_R_32F
19
20
  #define cublasComputeType_t cudaDataType_t
20
21
  #define cublasCreate mublasCreate
@@ -89,10 +89,6 @@ endif()
89
89
 
90
90
  add_compile_definitions(GGML_USE_HIP)
91
91
 
92
- if (GGML_HIP_UMA)
93
- add_compile_definitions(GGML_HIP_UMA)
94
- endif()
95
-
96
92
  if (GGML_CUDA_FORCE_MMQ)
97
93
  add_compile_definitions(GGML_CUDA_FORCE_MMQ)
98
94
  endif()
@@ -148,8 +148,14 @@ struct ggml_map_custom2_op_params {
148
148
 
149
149
  struct ggml_map_custom3_op_params {
150
150
  ggml_custom3_op_t fun;
151
- int n_tasks;
152
- void * userdata;
151
+ int n_tasks;
152
+ void * userdata;
153
+ };
154
+
155
+ struct ggml_custom_op_params {
156
+ ggml_custom_op_t fun;
157
+ int n_tasks;
158
+ void * userdata;
153
159
  };
154
160
 
155
161
  // bitset
@@ -311,29 +317,28 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size);
311
317
 
312
318
  // FP16 to FP32 conversion
313
319
 
314
- #if defined(__ARM_NEON)
315
- #if defined(_MSC_VER) || (defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11)
316
- typedef uint16_t ggml_fp16_internal_t;
317
- #else
318
- typedef __fp16 ggml_fp16_internal_t;
319
- #endif
320
- #endif
321
-
322
- #if defined(__ARM_NEON) && !defined(_MSC_VER) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11)
320
+ // 16-bit float
321
+ // on Arm, we use __fp16
322
+ // on x86, we use uint16_t
323
+ //
324
+ // for old CUDA compilers (<= 11), we use uint16_t: ref https://github.com/ggml-org/llama.cpp/pull/10616
325
+ // for MUSA compilers , we use uint16_t: ref https://github.com/ggml-org/llama.cpp/pull/11843
326
+ //
327
+ #if defined(__ARM_NEON) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) && !defined(__MUSACC__)
323
328
  #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
324
329
  #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
325
330
 
326
331
  #define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
327
332
 
328
333
  static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
329
- ggml_fp16_internal_t tmp;
334
+ __fp16 tmp;
330
335
  memcpy(&tmp, &h, sizeof(ggml_fp16_t));
331
336
  return (float)tmp;
332
337
  }
333
338
 
334
339
  static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
335
340
  ggml_fp16_t res;
336
- ggml_fp16_internal_t tmp = f;
341
+ __fp16 tmp = f;
337
342
  memcpy(&res, &tmp, sizeof(ggml_fp16_t));
338
343
  return res;
339
344
  }
@@ -357,8 +362,8 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size);
357
362
  #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
358
363
 
359
364
  static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
360
- register float f;
361
- register double d;
365
+ float f;
366
+ double d;
362
367
  __asm__(
363
368
  "mtfprd %0,%2\n"
364
369
  "xscvhpdp %0,%0\n"
@@ -370,8 +375,8 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size);
370
375
  }
371
376
 
372
377
  static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
373
- register double d;
374
- register ggml_fp16_t r;
378
+ double d;
379
+ ggml_fp16_t r;
375
380
  __asm__( /* xscvdphp can work on double or single precision */
376
381
  "xscvdphp %0,%2\n"
377
382
  "mffprd %1,%0\n" :
@@ -381,6 +386,35 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size);
381
386
  return r;
382
387
  }
383
388
 
389
+ #elif defined(__riscv) && defined(GGML_RV_ZFH)
390
+
391
+ static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
392
+ float f;
393
+ __asm__(
394
+ "fmv.h.x %[f], %[h]\n\t"
395
+ "fcvt.s.h %[f], %[f]"
396
+ : [f] "=&f" (f)
397
+ : [h] "r" (h)
398
+ );
399
+ return f;
400
+ }
401
+
402
+ static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
403
+ ggml_fp16_t res;
404
+ __asm__(
405
+ "fcvt.h.s %[f], %[f]\n\t"
406
+ "fmv.x.h %[h], %[f]"
407
+ : [h] "=&r" (res)
408
+ : [f] "f" (f)
409
+ );
410
+ return res;
411
+ }
412
+
413
+ #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
414
+ #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
415
+ #define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
416
+ #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
417
+
384
418
  #else
385
419
 
386
420
  // FP16 <-> FP32
@@ -456,7 +490,7 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size);
456
490
  #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
457
491
  #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
458
492
 
459
- #endif // defined(__ARM_NEON) && (!defined(__MSC_VER)
493
+ #endif // defined(__ARM_NEON) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) && !defined(__MUSACC__)
460
494
 
461
495
  // precomputed f32 table for f16 (256 KB)
462
496
  // defined in ggml.c, initialized in ggml_init()
@@ -1,6 +1,70 @@
1
1
  #ifndef GGML_METAL_IMPL
2
2
  #define GGML_METAL_IMPL
3
3
 
4
+ // kernel parameters for mat-vec threadgroups
5
+ //
6
+ // N_R0: number of src0 rows to process per simdgroup
7
+ // N_SG: number of simdgroups per threadgroup
8
+ //
9
+ // TODO: for optimal performance, become function of the device and work size
10
+
11
+ #define N_R0_Q4_0 4
12
+ #define N_SG_Q4_0 2
13
+
14
+ #define N_R0_Q4_1 4
15
+ #define N_SG_Q4_1 2
16
+
17
+ #define N_R0_Q5_0 4
18
+ #define N_SG_Q5_0 2
19
+
20
+ #define N_R0_Q5_1 4
21
+ #define N_SG_Q5_1 2
22
+
23
+ #define N_R0_Q8_0 4
24
+ #define N_SG_Q8_0 2
25
+
26
+ #define N_R0_Q2_K 4
27
+ #define N_SG_Q2_K 2
28
+
29
+ #define N_R0_Q3_K 2
30
+ #define N_SG_Q3_K 2
31
+
32
+ #define N_R0_Q4_K 4
33
+ #define N_SG_Q4_K 2
34
+
35
+ #define N_R0_Q5_K 2
36
+ #define N_SG_Q5_K 2
37
+
38
+ #define N_R0_Q6_K 1
39
+ #define N_SG_Q6_K 2
40
+
41
+ #define N_R0_IQ1_S 4
42
+ #define N_SG_IQ1_S 2
43
+
44
+ #define N_R0_IQ1_M 4
45
+ #define N_SG_IQ1_M 2
46
+
47
+ #define N_R0_IQ2_XXS 4
48
+ #define N_SG_IQ2_XXS 2
49
+
50
+ #define N_R0_IQ2_XS 4
51
+ #define N_SG_IQ2_XS 2
52
+
53
+ #define N_R0_IQ2_S 4
54
+ #define N_SG_IQ2_S 2
55
+
56
+ #define N_R0_IQ3_XXS 4
57
+ #define N_SG_IQ3_XXS 2
58
+
59
+ #define N_R0_IQ3_S 4
60
+ #define N_SG_IQ3_S 2
61
+
62
+ #define N_R0_IQ4_NL 2
63
+ #define N_SG_IQ4_NL 2
64
+
65
+ #define N_R0_IQ4_XS 2
66
+ #define N_SG_IQ4_XS 2
67
+
4
68
  // kernel argument structs
5
69
  //
6
70
  // - element counters (e.g. ne00) typically use int32_t to reduce register usage
@@ -143,6 +207,10 @@ typedef struct {
143
207
  float attn_factor;
144
208
  float beta_fast;
145
209
  float beta_slow;
210
+ int32_t sect_0;
211
+ int32_t sect_1;
212
+ int32_t sect_2;
213
+ int32_t sect_3;
146
214
  } ggml_metal_kargs_rope;
147
215
 
148
216
  typedef struct {
@@ -155,9 +223,12 @@ typedef struct {
155
223
  int32_t ne11;
156
224
  int32_t ne_12_2; // assume K and V are same shape
157
225
  int32_t ne_12_3;
158
- uint64_t nb_12_1;
159
- uint64_t nb_12_2;
160
- uint64_t nb_12_3;
226
+ uint64_t nb11;
227
+ uint64_t nb12;
228
+ uint64_t nb13;
229
+ uint64_t nb21;
230
+ uint64_t nb22;
231
+ uint64_t nb23;
161
232
  uint64_t nb31;
162
233
  int32_t ne1;
163
234
  int32_t ne2;
@@ -232,21 +303,42 @@ typedef struct {
232
303
  } ggml_metal_kargs_mul_mv_ext;
233
304
 
234
305
  typedef struct {
235
- int32_t nei0;
236
- int32_t nei1;
237
- uint64_t nbi1;
306
+ int32_t ne10;
307
+ int32_t ne11; // n_expert_used (bcast)
308
+ uint64_t nb11;
309
+ uint64_t nb12;
310
+ int32_t neh11; // n_tokens
311
+ uint64_t nbh11;
312
+ int32_t ne20; // n_expert_used
313
+ uint64_t nb21;
314
+ } ggml_metal_kargs_mul_mm_id_map0;
315
+
316
+ typedef struct {
317
+ int32_t ne20; // n_expert_used
318
+ int32_t neh0;
319
+ int32_t neh1;
320
+ uint64_t nbh1;
321
+ uint64_t nbh2;
322
+ int32_t ne0;
323
+ uint64_t nb1;
324
+ uint64_t nb2;
325
+ } ggml_metal_kargs_mul_mm_id_map1;
326
+
327
+ typedef struct {
238
328
  int32_t ne00;
239
329
  int32_t ne02;
240
330
  uint64_t nb01;
241
331
  uint64_t nb02;
242
- int32_t ne11;
243
- int32_t ne12;
244
- int32_t ne13;
245
- uint64_t nb10;
246
- uint64_t nb11;
247
- uint64_t nb12;
248
- int32_t ne0;
249
- int32_t ne1;
332
+ uint64_t nb03;
333
+ int32_t neh12;
334
+ uint64_t nbh10;
335
+ uint64_t nbh11;
336
+ uint64_t nbh12;
337
+ uint64_t nbh13;
338
+ int32_t neh0;
339
+ int32_t neh1;
340
+ int16_t r2;
341
+ int16_t r3;
250
342
  } ggml_metal_kargs_mul_mm_id;
251
343
 
252
344
  typedef struct {
@@ -25,124 +25,72 @@ endif ()
25
25
  if (GGML_OPENCL_EMBED_KERNELS)
26
26
  add_compile_definitions(GGML_OPENCL_EMBED_KERNELS)
27
27
 
28
- set(OPENCL_CL_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-opencl.cl.h")
29
- set(OPENCL_MM_CL_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-opencl_mm.cl.h")
30
- set(OPENCL_CVT_CL_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-opencl_cvt.cl.h")
28
+ set(EMBED_KERNEL_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/kernels/embed_kernel.py")
29
+ file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/autogenerated")
31
30
 
32
- set(OPENCL_GEMV_NOSHUFFLE_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-opencl_gemv_noshuffle.cl.h")
33
- set(OPENCL_GEMV_NOSHUFFLE_GENERAL_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-opencl_gemv_noshuffle_general.cl.h")
34
- set(OPENCL_MUL_MAT_Ab_Bi_8x4_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-opencl_mul_mat_Ab_Bi_8x4.cl.h")
35
- set(OPENCL_TRANSPOSE_16_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-opencl_transpose_16.cl.h")
36
- set(OPENCL_TRANSPOSE_32_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-opencl_transpose_32.cl.h")
37
- set(OPENCL_TRANSPOSE_32_16_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-opencl_transpose_32_16.cl.h")
38
-
39
- set(EMBED_KERNEL_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/kernels/embed_kernel.py")
40
- file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated")
41
-
42
- include_directories("${CMAKE_BINARY_DIR}/autogenerated")
43
-
44
- # Python must be accessible from command line
45
- add_custom_command(
46
- OUTPUT ${OPENCL_CL_SOURCE_EMBED}
47
- COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT}
48
- ${CMAKE_CURRENT_SOURCE_DIR}/kernels/ggml-opencl.cl
49
- ${OPENCL_CL_SOURCE_EMBED}
50
- DEPENDS kernels/ggml-opencl.cl ${EMBED_KERNEL_SCRIPT}
51
- COMMENT "Generate ggml-opencl.cl.h"
52
- )
53
-
54
- add_custom_command(
55
- OUTPUT ${OPENCL_MM_CL_SOURCE_EMBED}
56
- COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT}
57
- ${CMAKE_CURRENT_SOURCE_DIR}/kernels/ggml-opencl_mm.cl
58
- ${OPENCL_MM_CL_SOURCE_EMBED}
59
- DEPENDS kernels/ggml-opencl_mm.cl ${EMBED_KERNEL_SCRIPT}
60
- COMMENT "Generate ggml-opencl_mm.cl.h"
61
- )
62
-
63
- add_custom_command(
64
- OUTPUT ${OPENCL_CVT_CL_SOURCE_EMBED}
65
- COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT}
66
- ${CMAKE_CURRENT_SOURCE_DIR}/kernels/ggml-opencl_cvt.cl
67
- ${OPENCL_CVT_CL_SOURCE_EMBED}
68
- DEPENDS kernels/ggml-opencl_cvt.cl ${EMBED_KERNEL_SCRIPT}
69
- COMMENT "Generate ggml-opencl_cvt.cl.h"
70
- )
71
-
72
- add_custom_command(
73
- OUTPUT ${OPENCL_GEMV_NOSHUFFLE_SOURCE_EMBED}
74
- COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT}
75
- ${CMAKE_CURRENT_SOURCE_DIR}/kernels/ggml-opencl_gemv_noshuffle.cl
76
- ${OPENCL_GEMV_NOSHUFFLE_SOURCE_EMBED}
77
- DEPENDS kernels/ggml-opencl_gemv_noshuffle.cl ${EMBED_KERNEL_SCRIPT}
78
- COMMENT "Generate ggml-opencl_gemv_noshuffle.cl.h"
79
- )
80
-
81
- add_custom_command(
82
- OUTPUT ${OPENCL_GEMV_NOSHUFFLE_GENERAL_SOURCE_EMBED}
83
- COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT}
84
- ${CMAKE_CURRENT_SOURCE_DIR}/kernels/ggml-opencl_gemv_noshuffle_general.cl
85
- ${OPENCL_GEMV_NOSHUFFLE_GENERAL_SOURCE_EMBED}
86
- DEPENDS kernels/ggml-opencl_gemv_noshuffle_general.cl ${EMBED_KERNEL_SCRIPT}
87
- COMMENT "Generate ggml-opencl_gemv_noshuffle_general.cl.h"
88
- )
89
-
90
- add_custom_command(
91
- OUTPUT ${OPENCL_MUL_MAT_Ab_Bi_8x4_SOURCE_EMBED}
92
- COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT}
93
- ${CMAKE_CURRENT_SOURCE_DIR}/kernels/ggml-opencl_mul_mat_Ab_Bi_8x4.cl
94
- ${OPENCL_MUL_MAT_Ab_Bi_8x4_SOURCE_EMBED}
95
- DEPENDS kernels/ggml-opencl_mul_mat_Ab_Bi_8x4.cl ${EMBED_KERNEL_SCRIPT}
96
- COMMENT "Generate ggml-opencl_mul_mat_Ab_Bi_8x4.cl.cl.h"
97
- )
98
-
99
- add_custom_command(
100
- OUTPUT ${OPENCL_TRANSPOSE_16_SOURCE_EMBED}
101
- COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT}
102
- ${CMAKE_CURRENT_SOURCE_DIR}/kernels/ggml-opencl_transpose_16.cl
103
- ${OPENCL_TRANSPOSE_16_SOURCE_EMBED}
104
- DEPENDS kernels/ggml-opencl_transpose_16.cl ${EMBED_KERNEL_SCRIPT}
105
- COMMENT "Generate ggml-opencl_transpose_16.cl.h"
106
- )
107
-
108
- add_custom_command(
109
- OUTPUT ${OPENCL_TRANSPOSE_32_SOURCE_EMBED}
110
- COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT}
111
- ${CMAKE_CURRENT_SOURCE_DIR}/kernels/ggml-opencl_transpose_32.cl
112
- ${OPENCL_TRANSPOSE_32_SOURCE_EMBED}
113
- DEPENDS kernels/ggml-opencl_transpose_32.cl ${EMBED_KERNEL_SCRIPT}
114
- COMMENT "Generate ggml-opencl_transpose_32.cl.h"
115
- )
116
-
117
- add_custom_command(
118
- OUTPUT ${OPENCL_TRANSPOSE_32_16_SOURCE_EMBED}
119
- COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT}
120
- ${CMAKE_CURRENT_SOURCE_DIR}/kernels/ggml-opencl_transpose_32_16.cl
121
- ${OPENCL_TRANSPOSE_32_16_SOURCE_EMBED}
122
- DEPENDS kernels/ggml-opencl_transpose_32_16.cl ${EMBED_KERNEL_SCRIPT}
123
- COMMENT "Generate ggml-opencl_transpose_32_16.cl.h"
124
- )
125
-
126
- target_sources(${TARGET_NAME} PRIVATE
127
- ${OPENCL_CL_SOURCE_EMBED}
128
- ${OPENCL_MM_CL_SOURCE_EMBED}
129
- ${OPENCL_CVT_CL_SOURCE_EMBED}
130
- ${OPENCL_GEMV_NOSHUFFLE_SOURCE_EMBED}
131
- ${OPENCL_GEMV_NOSHUFFLE_GENERAL_SOURCE_EMBED}
132
- ${OPENCL_MUL_MAT_Ab_Bi_8x4_SOURCE_EMBED}
133
- ${OPENCL_TRANSPOSE_16_SOURCE_EMBED}
134
- ${OPENCL_TRANSPOSE_32_SOURCE_EMBED}
135
- ${OPENCL_TRANSPOSE_32_16_SOURCE_EMBED})
136
- else ()
137
- # copy ggml-opencl.cl to bin directory
138
- configure_file(kernels/ggml-opencl.cl ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-opencl.cl COPYONLY)
139
- configure_file(kernels/ggml-opencl_mm.cl ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-opencl_mm.cl COPYONLY)
140
- configure_file(kernels/ggml-opencl_cvt.cl ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-opencl_cvt.cl COPYONLY)
141
-
142
- configure_file(kernels/ggml-opencl_gemv_noshuffle.cl ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-opencl_gemv_noshuffle.cl COPYONLY)
143
- configure_file(kernels/ggml-opencl_gemv_noshuffle_general.cl ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-opencl_gemv_noshuffle_general.cl COPYONLY)
144
- configure_file(kernels/ggml-opencl_mul_mat_Ab_Bi_8x4.cl ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-opencl_mul_mat_Ab_Bi_8x4.cl COPYONLY)
145
- configure_file(kernels/ggml-opencl_transpose_16.cl ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-opencl_transpose_16.cl COPYONLY)
146
- configure_file(kernels/ggml-opencl_transpose_32.cl ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-opencl_transpose_32.cl COPYONLY)
147
- configure_file(kernels/ggml-opencl_transpose_32_16.cl ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-opencl_transpose_32_16.cl COPYONLY)
31
+ target_include_directories(${TARGET_NAME} PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/autogenerated")
148
32
  endif ()
33
+
34
+ function(ggml_opencl_add_kernel KNAME)
35
+ set(KERN_HDR ${CMAKE_CURRENT_BINARY_DIR}/autogenerated/${KNAME}.cl.h)
36
+ set(KERN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/kernels/${KNAME}.cl)
37
+
38
+ if (GGML_OPENCL_EMBED_KERNELS)
39
+ message(STATUS "opencl: embedding kernel ${KNAME}")
40
+
41
+ # Python must be accessible from command line
42
+ add_custom_command(
43
+ OUTPUT ${KERN_HDR}
44
+ COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT} ${KERN_SRC} ${KERN_HDR}
45
+ DEPENDS ${KERN_SRC} ${EMBED_KERNEL_SCRIPT}
46
+ COMMENT "Generate ${KERN_HDR}"
47
+ )
48
+
49
+ target_sources(${TARGET_NAME} PRIVATE ${KERN_HDR})
50
+ else ()
51
+ message(STATUS "opencl: adding kernel ${KNAME}")
52
+ configure_file(${KERN_SRC} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${KNAME}.cl COPYONLY)
53
+ endif ()
54
+ endfunction()
55
+
56
+ set(GGML_OPENCL_KERNELS
57
+ add
58
+ clamp
59
+ cpy
60
+ cvt
61
+ diag_mask_inf
62
+ gelu
63
+ gemv_noshuffle_general
64
+ gemv_noshuffle
65
+ get_rows
66
+ im2col_f32
67
+ im2col_f16
68
+ mul_mat_Ab_Bi_8x4
69
+ mul_mv_f16_f16
70
+ mul_mv_f16_f32_1row
71
+ mul_mv_f16_f32_l4
72
+ mul_mv_f16_f32
73
+ mul_mv_f32_f32
74
+ mul_mv_q4_0_f32
75
+ mul_mv_q4_0_f32_v
76
+ mul_mv_q4_0_f32_8x_flat
77
+ mul_mv_q4_0_f32_1d_8x_flat
78
+ mul_mv_q4_0_f32_1d_16x_flat
79
+ mul_mv_q6_k
80
+ mul
81
+ norm
82
+ relu
83
+ rms_norm
84
+ rope
85
+ scale
86
+ silu
87
+ softmax_4_f32
88
+ softmax_4_f16
89
+ softmax_f32
90
+ softmax_f16
91
+ transpose
92
+ )
93
+
94
+ foreach (K ${GGML_OPENCL_KERNELS})
95
+ ggml_opencl_add_kernel(${K})
96
+ endforeach()