@fugood/llama.node 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. package/CMakeLists.txt +1 -8
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/package.json +4 -2
  17. package/src/DetokenizeWorker.cpp +1 -1
  18. package/src/EmbeddingWorker.cpp +2 -2
  19. package/src/LlamaCompletionWorker.cpp +10 -10
  20. package/src/LlamaCompletionWorker.h +2 -2
  21. package/src/LlamaContext.cpp +14 -17
  22. package/src/TokenizeWorker.cpp +1 -1
  23. package/src/common.hpp +5 -4
  24. package/src/llama.cpp/.github/workflows/build.yml +137 -29
  25. package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
  26. package/src/llama.cpp/.github/workflows/docker.yml +46 -34
  27. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
  28. package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
  29. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
  30. package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
  31. package/src/llama.cpp/.github/workflows/server.yml +7 -0
  32. package/src/llama.cpp/CMakeLists.txt +26 -11
  33. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  34. package/src/llama.cpp/common/CMakeLists.txt +10 -10
  35. package/src/llama.cpp/common/arg.cpp +2041 -0
  36. package/src/llama.cpp/common/arg.h +77 -0
  37. package/src/llama.cpp/common/common.cpp +523 -1861
  38. package/src/llama.cpp/common/common.h +234 -106
  39. package/src/llama.cpp/common/console.cpp +3 -0
  40. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  41. package/src/llama.cpp/common/log.cpp +401 -0
  42. package/src/llama.cpp/common/log.h +66 -698
  43. package/src/llama.cpp/common/ngram-cache.cpp +39 -36
  44. package/src/llama.cpp/common/ngram-cache.h +19 -19
  45. package/src/llama.cpp/common/sampling.cpp +356 -350
  46. package/src/llama.cpp/common/sampling.h +62 -139
  47. package/src/llama.cpp/common/stb_image.h +5990 -6398
  48. package/src/llama.cpp/docs/build.md +72 -17
  49. package/src/llama.cpp/examples/CMakeLists.txt +1 -2
  50. package/src/llama.cpp/examples/batched/batched.cpp +49 -65
  51. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +42 -53
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
  53. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +22 -22
  54. package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
  55. package/src/llama.cpp/examples/embedding/embedding.cpp +147 -91
  56. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +37 -37
  57. package/src/llama.cpp/examples/export-lora/export-lora.cpp +39 -38
  58. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
  59. package/src/llama.cpp/examples/{baby-llama → gen-docs}/CMakeLists.txt +2 -2
  60. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
  61. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
  62. package/src/llama.cpp/examples/gritlm/gritlm.cpp +46 -39
  63. package/src/llama.cpp/examples/imatrix/imatrix.cpp +75 -69
  64. package/src/llama.cpp/examples/infill/infill.cpp +131 -192
  65. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +276 -178
  66. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  67. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +40 -36
  68. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  69. package/src/llama.cpp/examples/llava/clip.cpp +686 -150
  70. package/src/llama.cpp/examples/llava/clip.h +11 -2
  71. package/src/llama.cpp/examples/llava/llava-cli.cpp +60 -71
  72. package/src/llama.cpp/examples/llava/llava.cpp +146 -26
  73. package/src/llama.cpp/examples/llava/llava.h +2 -3
  74. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
  75. package/src/llama.cpp/examples/llava/requirements.txt +1 -0
  76. package/src/llama.cpp/examples/lookahead/lookahead.cpp +55 -56
  77. package/src/llama.cpp/examples/lookup/lookup-create.cpp +15 -13
  78. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  79. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +34 -33
  80. package/src/llama.cpp/examples/lookup/lookup.cpp +60 -63
  81. package/src/llama.cpp/examples/main/main.cpp +216 -313
  82. package/src/llama.cpp/examples/parallel/parallel.cpp +58 -59
  83. package/src/llama.cpp/examples/passkey/passkey.cpp +53 -61
  84. package/src/llama.cpp/examples/perplexity/perplexity.cpp +277 -311
  85. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  86. package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
  87. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -12
  88. package/src/llama.cpp/examples/retrieval/retrieval.cpp +57 -52
  89. package/src/llama.cpp/examples/rpc/rpc-server.cpp +27 -2
  90. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +60 -46
  91. package/src/llama.cpp/examples/server/CMakeLists.txt +7 -18
  92. package/src/llama.cpp/examples/server/server.cpp +1347 -1531
  93. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
  94. package/src/llama.cpp/examples/server/utils.hpp +396 -107
  95. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  96. package/src/llama.cpp/examples/simple/simple.cpp +132 -106
  97. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
  98. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
  99. package/src/llama.cpp/examples/speculative/speculative.cpp +153 -124
  100. package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
  101. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
  102. package/src/llama.cpp/examples/tokenize/tokenize.cpp +27 -29
  103. package/src/llama.cpp/ggml/CMakeLists.txt +29 -12
  104. package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
  105. package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
  106. package/src/llama.cpp/ggml/include/ggml-backend.h +166 -68
  107. package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
  108. package/src/llama.cpp/ggml/include/ggml-cann.h +17 -19
  109. package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
  110. package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
  111. package/src/llama.cpp/ggml/include/ggml-cuda.h +17 -17
  112. package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
  113. package/src/llama.cpp/ggml/include/ggml-metal.h +13 -12
  114. package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
  115. package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
  116. package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
  117. package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
  118. package/src/llama.cpp/ggml/include/ggml.h +272 -505
  119. package/src/llama.cpp/ggml/src/CMakeLists.txt +69 -1110
  120. package/src/llama.cpp/ggml/src/ggml-aarch64.c +52 -2116
  121. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
  122. package/src/llama.cpp/ggml/src/ggml-alloc.c +29 -27
  123. package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
  124. package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
  125. package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  126. package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
  127. package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
  128. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +144 -81
  129. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
  130. package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +394 -635
  131. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
  132. package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +217 -70
  133. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
  134. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
  135. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
  136. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
  137. package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
  138. package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +458 -353
  139. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
  140. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
  141. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
  142. package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
  143. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
  144. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
  145. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
  146. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +371 -0
  147. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
  148. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  149. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
  150. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
  151. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1885 -0
  152. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
  153. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  154. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
  155. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  156. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
  157. package/src/llama.cpp/ggml/src/ggml-impl.h +380 -584
  158. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
  159. package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +233 -87
  160. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
  161. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
  162. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
  163. package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
  164. package/src/llama.cpp/ggml/src/ggml-quants.c +369 -9994
  165. package/src/llama.cpp/ggml/src/ggml-quants.h +78 -110
  166. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
  167. package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +560 -335
  168. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
  169. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +6 -0
  170. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +51 -0
  171. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +310 -0
  172. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
  173. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
  174. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
  175. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
  176. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
  177. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
  178. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
  179. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +18 -25
  180. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
  181. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
  182. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
  183. package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3350 -3980
  184. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
  185. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
  186. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +70 -68
  187. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +9 -6
  188. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
  189. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
  190. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +8 -0
  191. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
  192. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
  193. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
  194. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
  195. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
  196. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
  197. package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  198. package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
  199. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
  200. package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2034 -1718
  201. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +2 -0
  202. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +152 -185
  203. package/src/llama.cpp/ggml/src/ggml.c +2075 -16579
  204. package/src/llama.cpp/include/llama.h +296 -285
  205. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
  206. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
  207. package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
  208. package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
  209. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
  210. package/src/llama.cpp/src/CMakeLists.txt +2 -1
  211. package/src/llama.cpp/src/llama-grammar.cpp +721 -122
  212. package/src/llama.cpp/src/llama-grammar.h +120 -15
  213. package/src/llama.cpp/src/llama-impl.h +156 -1
  214. package/src/llama.cpp/src/llama-sampling.cpp +2058 -346
  215. package/src/llama.cpp/src/llama-sampling.h +39 -47
  216. package/src/llama.cpp/src/llama-vocab.cpp +390 -127
  217. package/src/llama.cpp/src/llama-vocab.h +60 -20
  218. package/src/llama.cpp/src/llama.cpp +6215 -3263
  219. package/src/llama.cpp/src/unicode-data.cpp +6 -4
  220. package/src/llama.cpp/src/unicode-data.h +4 -4
  221. package/src/llama.cpp/src/unicode.cpp +15 -7
  222. package/src/llama.cpp/tests/CMakeLists.txt +4 -2
  223. package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
  224. package/src/llama.cpp/tests/test-backend-ops.cpp +1725 -297
  225. package/src/llama.cpp/tests/test-barrier.cpp +94 -0
  226. package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
  227. package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
  228. package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
  229. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +23 -8
  230. package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
  231. package/src/llama.cpp/tests/test-log.cpp +39 -0
  232. package/src/llama.cpp/tests/test-opt.cpp +853 -142
  233. package/src/llama.cpp/tests/test-quantize-fns.cpp +28 -19
  234. package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
  235. package/src/llama.cpp/tests/test-rope.cpp +2 -1
  236. package/src/llama.cpp/tests/test-sampling.cpp +226 -142
  237. package/src/llama.cpp/tests/test-tokenizer-0.cpp +56 -36
  238. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
  239. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
  240. package/patches/llama.patch +0 -22
  241. package/src/llama.cpp/.github/workflows/bench.yml +0 -310
  242. package/src/llama.cpp/common/grammar-parser.cpp +0 -536
  243. package/src/llama.cpp/common/grammar-parser.h +0 -29
  244. package/src/llama.cpp/common/train.cpp +0 -1513
  245. package/src/llama.cpp/common/train.h +0 -233
  246. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1640
  247. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
  248. package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
  249. package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +0 -1027
  250. package/src/llama.cpp/tests/test-grad0.cpp +0 -1566
  251. /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
  252. /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
@@ -9,13 +9,15 @@ extern "C" {
9
9
  #endif
10
10
 
11
11
  // backend API
12
- GGML_API GGML_CALL ggml_backend_t ggml_backend_blas_init(void);
12
+ GGML_BACKEND_API ggml_backend_t ggml_backend_blas_init(void);
13
13
 
14
- GGML_API GGML_CALL bool ggml_backend_is_blas(ggml_backend_t backend);
14
+ GGML_BACKEND_API bool ggml_backend_is_blas(ggml_backend_t backend);
15
15
 
16
16
  // number of threads used for conversion to float
17
17
  // for openblas and blis, this will also set the number of threads used for blas operations
18
- GGML_API GGML_CALL void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
18
+ GGML_BACKEND_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
19
+
20
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_blas_reg(void);
19
21
 
20
22
 
21
23
  #ifdef __cplusplus
@@ -34,6 +34,8 @@ extern "C" {
34
34
  */
35
35
  #define GGML_CANN_MAX_DEVICES 16
36
36
 
37
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cann_reg(void);
38
+
37
39
  /**
38
40
  * @brief Initializes the CANN backend for a specified device.
39
41
  *
@@ -44,7 +46,7 @@ extern "C" {
44
46
  * @param device The index of the device to initialize.
45
47
  * @return A pointer to the initialized backend instance, or nullptr on failure.
46
48
  */
47
- GGML_API GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device);
49
+ GGML_BACKEND_API ggml_backend_t ggml_backend_cann_init(int32_t device);
48
50
 
49
51
  /**
50
52
  * @brief Checks if a given backend is a CANN backend.
@@ -55,7 +57,7 @@ GGML_API GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device);
55
57
  * @param backend The backend instance to check.
56
58
  * @return True if the backend is a CANN backend, false otherwise.
57
59
  */
58
- GGML_API GGML_CALL bool ggml_backend_is_cann(ggml_backend_t backend);
60
+ GGML_BACKEND_API bool ggml_backend_is_cann(ggml_backend_t backend);
59
61
 
60
62
  /**
61
63
  * @brief Retrieves the CANN buffer type for a specified device.
@@ -67,7 +69,7 @@ GGML_API GGML_CALL bool ggml_backend_is_cann(ggml_backend_t backend);
67
69
  * @return A pointer to the buffer type interface for the specified device, or
68
70
  * nullptr if the device index is out of range.
69
71
  */
70
- GGML_API GGML_CALL ggml_backend_buffer_type_t
72
+ GGML_BACKEND_API ggml_backend_buffer_type_t
71
73
  ggml_backend_cann_buffer_type(int32_t device);
72
74
 
73
75
  /**
@@ -78,7 +80,14 @@ ggml_backend_cann_buffer_type(int32_t device);
78
80
  *
79
81
  * @return The number of CANN devices available.
80
82
  */
81
- GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
83
+ GGML_BACKEND_API int32_t ggml_backend_cann_get_device_count(void);
84
+
85
+ /**
86
+ * @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
87
+ *
88
+ * @return A pointer to the host buffer type interface.
89
+ */
90
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
82
91
 
83
92
  /**
84
93
  * @brief Retrieves the description of a specific CANN device.
@@ -90,7 +99,7 @@ GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
90
99
  * @param description Pointer to a buffer where the description will be written.
91
100
  * @param description_size Size of the description buffer.
92
101
  */
93
- GGML_API GGML_CALL void ggml_backend_cann_get_device_description(
102
+ GGML_BACKEND_API void ggml_backend_cann_get_device_description(
94
103
  int32_t device, char* description, size_t description_size);
95
104
 
96
105
  /**
@@ -105,20 +114,9 @@ GGML_API GGML_CALL void ggml_backend_cann_get_device_description(
105
114
  * @param total Pointer to a variable where the total memory size will be
106
115
  * stored.
107
116
  */
108
- GGML_API GGML_CALL void ggml_backend_cann_get_device_memory(int32_t device,
109
- size_t* free,
110
- size_t* total);
111
-
112
- /**
113
- * @brief Set the logging callback for GGML.
114
- *
115
- * This function sets the logging callback and user data for logging.
116
- *
117
- * @param log_callback The logging callback to set.
118
- * @param user_data User data to pass to the logging callback.
119
- */
120
- GGML_API void ggml_backend_cann_log_set_callback(ggml_log_callback log_callback,
121
- void* user_data);
117
+ GGML_BACKEND_API void ggml_backend_cann_get_device_memory(int32_t device,
118
+ size_t* free,
119
+ size_t* total);
122
120
 
123
121
  #ifdef __cplusplus
124
122
  }
@@ -0,0 +1,38 @@
1
+ #pragma once
2
+
3
+ #ifndef __cplusplus
4
+ #error "This header is for C++ only"
5
+ #endif
6
+
7
+ #include "ggml.h"
8
+ #include "ggml-alloc.h"
9
+ #include "ggml-backend.h"
10
+ #include <memory>
11
+
12
+ // Smart pointers for ggml types
13
+
14
+ // ggml
15
+
16
+ struct ggml_context_deleter { void operator()(ggml_context * ctx) { ggml_free(ctx); } };
17
+ struct gguf_context_deleter { void operator()(gguf_context * ctx) { gguf_free(ctx); } };
18
+
19
+ typedef std::unique_ptr<ggml_context, ggml_context_deleter> ggml_context_ptr;
20
+ typedef std::unique_ptr<gguf_context, gguf_context_deleter> gguf_context_ptr;
21
+
22
+ // ggml-alloc
23
+
24
+ struct ggml_gallocr_deleter { void operator()(ggml_gallocr_t galloc) { ggml_gallocr_free(galloc); } };
25
+
26
+ typedef std::unique_ptr<ggml_gallocr_t, ggml_gallocr_deleter> ggml_gallocr_ptr;
27
+
28
+ // ggml-backend
29
+
30
+ struct ggml_backend_deleter { void operator()(ggml_backend_t backend) { ggml_backend_free(backend); } };
31
+ struct ggml_backend_buffer_deleter { void operator()(ggml_backend_buffer_t buffer) { ggml_backend_buffer_free(buffer); } };
32
+ struct ggml_backend_event_deleter { void operator()(ggml_backend_event_t event) { ggml_backend_event_free(event); } };
33
+ struct ggml_backend_sched_deleter { void operator()(ggml_backend_sched_t sched) { ggml_backend_sched_free(sched); } };
34
+
35
+ typedef std::unique_ptr<ggml_backend, ggml_backend_deleter> ggml_backend_ptr;
36
+ typedef std::unique_ptr<ggml_backend_buffer, ggml_backend_buffer_deleter> ggml_backend_buffer_ptr;
37
+ typedef std::unique_ptr<ggml_backend_event, ggml_backend_event_deleter> ggml_backend_event_ptr;
38
+ typedef std::unique_ptr<ggml_backend_sched, ggml_backend_sched_deleter> ggml_backend_sched_ptr;
@@ -0,0 +1,177 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+ #include "ggml-backend.h"
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ // Scheduling priorities
11
+ enum ggml_sched_priority {
12
+ GGML_SCHED_PRIO_NORMAL,
13
+ GGML_SCHED_PRIO_MEDIUM,
14
+ GGML_SCHED_PRIO_HIGH,
15
+ GGML_SCHED_PRIO_REALTIME
16
+ };
17
+
18
+ // Threadpool params
19
+ // Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
20
+ struct ggml_threadpool_params {
21
+ bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
22
+ int n_threads; // number of threads
23
+ enum ggml_sched_priority prio; // thread priority
24
+ uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
25
+ bool strict_cpu; // strict cpu placement
26
+ bool paused; // start in paused state
27
+ };
28
+
29
+ struct ggml_threadpool; // forward declaration, see ggml.c
30
+
31
+ typedef struct ggml_threadpool * ggml_threadpool_t;
32
+
33
+ // the compute plan that needs to be prepared for ggml_graph_compute()
34
+ // since https://github.com/ggerganov/ggml/issues/287
35
+ struct ggml_cplan {
36
+ size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
37
+ uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
38
+
39
+ int n_threads;
40
+ struct ggml_threadpool * threadpool;
41
+
42
+ // abort ggml_graph_compute when true
43
+ ggml_abort_callback abort_callback;
44
+ void * abort_callback_data;
45
+ };
46
+
47
+ // numa strategies
48
+ enum ggml_numa_strategy {
49
+ GGML_NUMA_STRATEGY_DISABLED = 0,
50
+ GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
51
+ GGML_NUMA_STRATEGY_ISOLATE = 2,
52
+ GGML_NUMA_STRATEGY_NUMACTL = 3,
53
+ GGML_NUMA_STRATEGY_MIRROR = 4,
54
+ GGML_NUMA_STRATEGY_COUNT
55
+ };
56
+
57
+ GGML_BACKEND_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
58
+ GGML_BACKEND_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
59
+
60
+ GGML_BACKEND_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
61
+ GGML_BACKEND_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
62
+
63
+ GGML_BACKEND_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
64
+ GGML_BACKEND_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
65
+
66
+ GGML_BACKEND_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
67
+ GGML_BACKEND_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
68
+
69
+ GGML_BACKEND_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
70
+ GGML_BACKEND_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
71
+
72
+ GGML_BACKEND_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
73
+ GGML_BACKEND_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
74
+
75
+ GGML_BACKEND_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
76
+ GGML_BACKEND_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
77
+
78
+ GGML_BACKEND_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
79
+ GGML_BACKEND_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p, int n_threads);
80
+ GGML_BACKEND_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1);
81
+ GGML_BACKEND_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
82
+ GGML_BACKEND_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
83
+ GGML_BACKEND_API int ggml_threadpool_get_n_threads(struct ggml_threadpool * threadpool);
84
+ GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
85
+ GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
86
+
87
+ // ggml_graph_plan() has to be called before ggml_graph_compute()
88
+ // when plan.work_size > 0, caller must allocate memory for plan.work_data
89
+ GGML_BACKEND_API struct ggml_cplan ggml_graph_plan(
90
+ const struct ggml_cgraph * cgraph,
91
+ int n_threads, /* = GGML_DEFAULT_N_THREADS */
92
+ struct ggml_threadpool * threadpool /* = NULL */ );
93
+ GGML_BACKEND_API enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
94
+
95
+ // same as ggml_graph_compute() but the work data is allocated as a part of the context
96
+ // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
97
+ GGML_BACKEND_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
98
+
99
+ //
100
+ // system info
101
+ //
102
+
103
+ // x86
104
+ GGML_BACKEND_API int ggml_cpu_has_sse3 (void);
105
+ GGML_BACKEND_API int ggml_cpu_has_ssse3 (void);
106
+ GGML_BACKEND_API int ggml_cpu_has_avx (void);
107
+ GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
108
+ GGML_BACKEND_API int ggml_cpu_has_f16c (void);
109
+ GGML_BACKEND_API int ggml_cpu_has_fma (void);
110
+ GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
111
+ GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
112
+ GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);
113
+ GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);
114
+ GGML_BACKEND_API int ggml_cpu_has_avx512_bf16(void);
115
+ GGML_BACKEND_API int ggml_cpu_has_amx_int8 (void);
116
+ // ARM
117
+ GGML_BACKEND_API int ggml_cpu_has_neon (void);
118
+ GGML_BACKEND_API int ggml_cpu_has_arm_fma (void);
119
+ GGML_BACKEND_API int ggml_cpu_has_fp16_va (void);
120
+ GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
121
+ GGML_BACKEND_API int ggml_cpu_has_sve (void);
122
+ GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes
123
+ // other
124
+ GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
125
+ GGML_BACKEND_API int ggml_cpu_has_vsx (void);
126
+ GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
127
+ GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
128
+
129
+ // Internal types and functions exposed for tests and benchmarks
130
+
131
+ typedef void (*ggml_from_float_to_mat_t)
132
+ (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t nr, int64_t k, int64_t bs);
133
+ typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
134
+ const void * GGML_RESTRICT y, size_t by, int nrc);
135
+ typedef void (*ggml_gemv_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
136
+ const void * GGML_RESTRICT y, int nr, int nc);
137
+ typedef void (*ggml_gemm_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
138
+ const void * GGML_RESTRICT y, int nr, int nc);
139
+
140
+ struct ggml_type_traits_cpu {
141
+ ggml_from_float_t from_float;
142
+ ggml_from_float_to_mat_t from_float_to_mat;
143
+ ggml_vec_dot_t vec_dot;
144
+ enum ggml_type vec_dot_type;
145
+ int64_t nrows; // number of rows to process simultaneously
146
+ int64_t ncols; // number of columns to process simultaneously
147
+ ggml_gemv_t gemv;
148
+ ggml_gemm_t gemm;
149
+ };
150
+
151
+ GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);
152
+
153
+ GGML_BACKEND_API void ggml_cpu_init(void);
154
+
155
+ //
156
+ // CPU backend
157
+ //
158
+
159
+ GGML_BACKEND_API ggml_backend_t ggml_backend_cpu_init(void);
160
+
161
+ GGML_BACKEND_API bool ggml_backend_is_cpu (ggml_backend_t backend);
162
+ GGML_BACKEND_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
163
+ GGML_BACKEND_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
164
+ GGML_BACKEND_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
165
+
166
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
167
+
168
+ #ifdef GGML_USE_CPU_HBM
169
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
170
+ #endif
171
+
172
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void);
173
+ GGML_BACKEND_API bool ggml_backend_cpu_buft_is_aarch64(ggml_backend_buffer_type_t buft);
174
+
175
+ #ifdef __cplusplus
176
+ }
177
+ #endif
@@ -3,7 +3,11 @@
3
3
  #include "ggml.h"
4
4
  #include "ggml-backend.h"
5
5
 
6
- #ifdef GGML_USE_HIPBLAS
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ #ifdef GGML_USE_HIP
7
11
  #define GGML_CUDA_NAME "ROCm"
8
12
  #define GGML_CUBLAS_NAME "hipBLAS"
9
13
  #elif defined(GGML_USE_MUSA)
@@ -13,35 +17,31 @@
13
17
  #define GGML_CUDA_NAME "CUDA"
14
18
  #define GGML_CUBLAS_NAME "cuBLAS"
15
19
  #endif
16
-
17
- #ifdef __cplusplus
18
- extern "C" {
19
- #endif
20
-
21
20
  #define GGML_CUDA_MAX_DEVICES 16
22
21
 
23
22
  // backend API
24
- GGML_API GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device);
23
+ GGML_BACKEND_API ggml_backend_t ggml_backend_cuda_init(int device);
25
24
 
26
- GGML_API GGML_CALL bool ggml_backend_is_cuda(ggml_backend_t backend);
25
+ GGML_BACKEND_API bool ggml_backend_is_cuda(ggml_backend_t backend);
27
26
 
28
27
  // device buffer
29
- GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
28
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
30
29
 
31
30
  // split tensor buffer that splits matrices by rows across multiple devices
32
- GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(const float * tensor_split);
31
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(int main_device, const float * tensor_split);
33
32
 
34
33
  // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
35
- GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
34
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
35
+
36
+ GGML_BACKEND_API int ggml_backend_cuda_get_device_count(void);
37
+ GGML_BACKEND_API void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
38
+ GGML_BACKEND_API void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
36
39
 
37
- GGML_API GGML_CALL int ggml_backend_cuda_get_device_count(void);
38
- GGML_API GGML_CALL void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
39
- GGML_API GGML_CALL void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
40
+ GGML_BACKEND_API bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
41
+ GGML_BACKEND_API void ggml_backend_cuda_unregister_host_buffer(void * buffer);
40
42
 
41
- GGML_API GGML_CALL bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
42
- GGML_API GGML_CALL void ggml_backend_cuda_unregister_host_buffer(void * buffer);
43
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cuda_reg(void);
43
44
 
44
- GGML_API void ggml_backend_cuda_log_set_callback(ggml_log_callback log_callback, void * user_data);
45
45
  #ifdef __cplusplus
46
46
  }
47
47
  #endif
@@ -11,6 +11,8 @@
11
11
  extern "C" {
12
12
  #endif
13
13
 
14
+ #define GGML_KOMPUTE_MAX_DEVICES 16
15
+
14
16
  struct ggml_vk_device {
15
17
  int index;
16
18
  int type; // same as VkPhysicalDeviceType
@@ -35,11 +37,13 @@ struct ggml_vk_device ggml_vk_current_device(void);
35
37
  // forward declaration
36
38
  typedef struct ggml_backend * ggml_backend_t;
37
39
 
38
- GGML_API ggml_backend_t ggml_backend_kompute_init(int device);
40
+ GGML_BACKEND_API ggml_backend_t ggml_backend_kompute_init(int device);
41
+
42
+ GGML_BACKEND_API bool ggml_backend_is_kompute(ggml_backend_t backend);
39
43
 
40
- GGML_API bool ggml_backend_is_kompute(ggml_backend_t backend);
44
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
41
45
 
42
- GGML_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
46
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_kompute_reg(void);
43
47
 
44
48
  #ifdef __cplusplus
45
49
  }
@@ -1,3 +1,5 @@
1
+ // Note: this description is outdated
2
+ //
1
3
  // An interface allowing to compute ggml_cgraph with Metal
2
4
  //
3
5
  // This is a fully functional interface that extends ggml with GPU support for Apple devices.
@@ -25,9 +27,6 @@
25
27
  #include <stddef.h>
26
28
  #include <stdbool.h>
27
29
 
28
- // max memory buffers that can be mapped to the device
29
- #define GGML_METAL_MAX_BUFFERS 64
30
-
31
30
  struct ggml_tensor;
32
31
  struct ggml_cgraph;
33
32
 
@@ -40,25 +39,27 @@ extern "C" {
40
39
  // user-code should use only these functions
41
40
  //
42
41
 
43
- GGML_API void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
42
+ GGML_BACKEND_API ggml_backend_t ggml_backend_metal_init(void);
44
43
 
45
- GGML_API ggml_backend_t ggml_backend_metal_init(void);
44
+ GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend);
46
45
 
47
- GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
46
+ GGML_DEPRECATED(
47
+ GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
48
+ "obsoleted by the new device interface - https://github.com/ggerganov/llama.cpp/pull/9713");
48
49
 
49
- GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size);
50
+ GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
50
51
 
51
- GGML_API void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb);
52
-
53
- GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
52
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
54
53
 
55
54
  // helper to check if the device supports a specific family
56
55
  // ideally, the user code should be doing these checks
57
56
  // ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
58
- GGML_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
57
+ GGML_BACKEND_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
59
58
 
60
59
  // capture all command buffers committed the next time `ggml_backend_graph_compute` is called
61
- GGML_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend);
60
+ GGML_BACKEND_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend);
61
+
62
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_metal_reg(void);
62
63
 
63
64
  #ifdef __cplusplus
64
65
  }