@fugood/llama.node 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. package/CMakeLists.txt +1 -10
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/package.json +6 -4
  17. package/src/LlamaCompletionWorker.cpp +6 -6
  18. package/src/LlamaContext.cpp +7 -9
  19. package/src/common.hpp +2 -1
  20. package/src/llama.cpp/.github/workflows/build.yml +98 -24
  21. package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
  22. package/src/llama.cpp/.github/workflows/docker.yml +43 -34
  23. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
  24. package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
  25. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
  26. package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
  27. package/src/llama.cpp/.github/workflows/server.yml +7 -0
  28. package/src/llama.cpp/CMakeLists.txt +20 -8
  29. package/src/llama.cpp/common/CMakeLists.txt +12 -10
  30. package/src/llama.cpp/common/arg.cpp +2006 -0
  31. package/src/llama.cpp/common/arg.h +77 -0
  32. package/src/llama.cpp/common/common.cpp +496 -1632
  33. package/src/llama.cpp/common/common.h +161 -63
  34. package/src/llama.cpp/common/console.cpp +3 -0
  35. package/src/llama.cpp/common/log.cpp +401 -0
  36. package/src/llama.cpp/common/log.h +66 -698
  37. package/src/llama.cpp/common/ngram-cache.cpp +3 -0
  38. package/src/llama.cpp/common/sampling.cpp +348 -350
  39. package/src/llama.cpp/common/sampling.h +62 -139
  40. package/src/llama.cpp/common/stb_image.h +5990 -6398
  41. package/src/llama.cpp/common/train.cpp +2 -0
  42. package/src/llama.cpp/docs/build.md +36 -1
  43. package/src/llama.cpp/examples/CMakeLists.txt +0 -1
  44. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1 -2
  45. package/src/llama.cpp/examples/batched/batched.cpp +39 -55
  46. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +34 -44
  47. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
  48. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +15 -15
  49. package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
  50. package/src/llama.cpp/examples/embedding/embedding.cpp +143 -87
  51. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +33 -33
  52. package/src/llama.cpp/examples/export-lora/export-lora.cpp +36 -35
  53. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
  54. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +5 -0
  55. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
  56. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
  57. package/src/llama.cpp/examples/gritlm/gritlm.cpp +34 -27
  58. package/src/llama.cpp/examples/imatrix/imatrix.cpp +59 -62
  59. package/src/llama.cpp/examples/infill/infill.cpp +117 -132
  60. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +265 -58
  61. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +29 -22
  62. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  63. package/src/llama.cpp/examples/llava/clip.cpp +685 -150
  64. package/src/llama.cpp/examples/llava/clip.h +11 -2
  65. package/src/llama.cpp/examples/llava/llava-cli.cpp +47 -58
  66. package/src/llama.cpp/examples/llava/llava.cpp +110 -24
  67. package/src/llama.cpp/examples/llava/llava.h +2 -3
  68. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
  69. package/src/llama.cpp/examples/llava/requirements.txt +1 -0
  70. package/src/llama.cpp/examples/lookahead/lookahead.cpp +42 -43
  71. package/src/llama.cpp/examples/lookup/lookup-create.cpp +10 -8
  72. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +23 -22
  73. package/src/llama.cpp/examples/lookup/lookup.cpp +40 -43
  74. package/src/llama.cpp/examples/main/main.cpp +210 -262
  75. package/src/llama.cpp/examples/parallel/parallel.cpp +49 -49
  76. package/src/llama.cpp/examples/passkey/passkey.cpp +42 -50
  77. package/src/llama.cpp/examples/perplexity/perplexity.cpp +187 -200
  78. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  79. package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
  80. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -3
  81. package/src/llama.cpp/examples/retrieval/retrieval.cpp +49 -44
  82. package/src/llama.cpp/examples/rpc/rpc-server.cpp +24 -1
  83. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +32 -35
  84. package/src/llama.cpp/examples/server/CMakeLists.txt +3 -5
  85. package/src/llama.cpp/examples/server/server.cpp +1027 -1073
  86. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
  87. package/src/llama.cpp/examples/server/utils.hpp +107 -105
  88. package/src/llama.cpp/examples/simple/simple.cpp +35 -41
  89. package/src/llama.cpp/examples/speculative/speculative.cpp +129 -103
  90. package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
  91. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
  92. package/src/llama.cpp/examples/tokenize/tokenize.cpp +25 -27
  93. package/src/llama.cpp/ggml/CMakeLists.txt +14 -3
  94. package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
  95. package/src/llama.cpp/ggml/include/ggml-backend.h +145 -60
  96. package/src/llama.cpp/ggml/include/ggml-blas.h +3 -3
  97. package/src/llama.cpp/ggml/include/ggml-cann.h +15 -19
  98. package/src/llama.cpp/ggml/include/ggml-cuda.h +16 -16
  99. package/src/llama.cpp/ggml/include/ggml-metal.h +5 -8
  100. package/src/llama.cpp/ggml/include/ggml-rpc.h +5 -5
  101. package/src/llama.cpp/ggml/include/ggml-sycl.h +8 -8
  102. package/src/llama.cpp/ggml/include/ggml-vulkan.h +7 -7
  103. package/src/llama.cpp/ggml/include/ggml.h +293 -186
  104. package/src/llama.cpp/ggml/src/CMakeLists.txt +86 -44
  105. package/src/llama.cpp/ggml/src/ggml-aarch64.c +2135 -1119
  106. package/src/llama.cpp/ggml/src/ggml-alloc.c +6 -0
  107. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +152 -70
  108. package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +606 -286
  109. package/src/llama.cpp/ggml/src/ggml-blas.cpp +9 -10
  110. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
  111. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
  112. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
  113. package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
  114. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
  115. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
  116. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
  117. package/src/llama.cpp/ggml/src/ggml-cann.cpp +215 -216
  118. package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
  119. package/src/llama.cpp/ggml/src/ggml-cpu-impl.h +614 -0
  120. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  121. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
  122. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  123. package/src/llama.cpp/ggml/src/ggml-impl.h +49 -603
  124. package/src/llama.cpp/ggml/src/ggml-kompute.cpp +4 -24
  125. package/src/llama.cpp/ggml/src/ggml-quants.c +972 -92
  126. package/src/llama.cpp/ggml/src/ggml-quants.h +15 -0
  127. package/src/llama.cpp/ggml/src/ggml-rpc.cpp +116 -66
  128. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
  129. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +11 -0
  130. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +52 -0
  131. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
  132. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
  133. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
  134. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
  135. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
  136. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
  137. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +16 -3
  138. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
  140. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
  141. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1 -1
  142. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +6 -3
  143. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +2 -0
  144. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
  145. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
  146. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
  147. package/src/llama.cpp/ggml/src/ggml-sycl.cpp +97 -169
  148. package/src/llama.cpp/ggml/src/ggml-vulkan.cpp +1508 -1124
  149. package/src/llama.cpp/ggml/src/ggml.c +3001 -1647
  150. package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +192 -0
  151. package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +2 -0
  152. package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +88 -40
  153. package/src/llama.cpp/include/llama.h +241 -264
  154. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
  155. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
  156. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
  157. package/src/llama.cpp/src/llama-grammar.cpp +721 -122
  158. package/src/llama.cpp/src/llama-grammar.h +120 -15
  159. package/src/llama.cpp/src/llama-impl.h +156 -1
  160. package/src/llama.cpp/src/llama-sampling.cpp +1375 -303
  161. package/src/llama.cpp/src/llama-sampling.h +20 -47
  162. package/src/llama.cpp/src/llama-vocab.cpp +343 -120
  163. package/src/llama.cpp/src/llama-vocab.h +33 -17
  164. package/src/llama.cpp/src/llama.cpp +4247 -1525
  165. package/src/llama.cpp/src/unicode-data.cpp +6 -4
  166. package/src/llama.cpp/src/unicode-data.h +4 -4
  167. package/src/llama.cpp/src/unicode.cpp +15 -7
  168. package/src/llama.cpp/tests/CMakeLists.txt +3 -0
  169. package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
  170. package/src/llama.cpp/tests/test-backend-ops.cpp +1592 -289
  171. package/src/llama.cpp/tests/test-barrier.cpp +93 -0
  172. package/src/llama.cpp/tests/test-grad0.cpp +187 -70
  173. package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
  174. package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
  175. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +6 -4
  176. package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
  177. package/src/llama.cpp/tests/test-log.cpp +39 -0
  178. package/src/llama.cpp/tests/test-quantize-fns.cpp +6 -0
  179. package/src/llama.cpp/tests/test-rope.cpp +1 -1
  180. package/src/llama.cpp/tests/test-sampling.cpp +157 -98
  181. package/src/llama.cpp/tests/test-tokenizer-0.cpp +55 -35
  182. package/patches/llama.patch +0 -22
  183. package/src/llama.cpp/.github/workflows/bench.yml +0 -310
  184. package/src/llama.cpp/common/grammar-parser.cpp +0 -536
  185. package/src/llama.cpp/common/grammar-parser.h +0 -29
  186. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
  187. package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
@@ -56,6 +56,15 @@ else()
56
56
  set(GGML_NATIVE_DEFAULT ON)
57
57
  endif()
58
58
 
59
+ # defaults
60
+ if (NOT GGML_LLAMAFILE_DEFAULT)
61
+ set(GGML_LLAMAFILE_DEFAULT OFF)
62
+ endif()
63
+
64
+ if (NOT GGML_CUDA_GRAPHS_DEFAULT)
65
+ set(GGML_CUDA_GRAPHS_DEFAULT OFF)
66
+ endif()
67
+
59
68
  # general
60
69
  option(GGML_STATIC "ggml: static link libraries" OFF)
61
70
  option(GGML_NATIVE "ggml: enable -march=native flag" ${GGML_NATIVE_DEFAULT})
@@ -110,7 +119,7 @@ option(GGML_ACCELERATE "ggml: enable Accelerate framework"
110
119
  option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT})
111
120
  set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
112
121
  "ggml: BLAS library vendor")
113
- option(GGML_LLAMAFILE "ggml: use LLAMAFILE" OFF)
122
+ option(GGML_LLAMAFILE "ggml: use LLAMAFILE" ${GGML_LLAMAFILE_DEFAULT})
114
123
 
115
124
  option(GGML_CUDA "ggml: use CUDA" OFF)
116
125
  option(GGML_MUSA "ggml: use MUSA" OFF)
@@ -127,15 +136,16 @@ set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
127
136
  option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
128
137
  option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
129
138
  option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
130
- option(GGML_CUDA_USE_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" OFF)
139
+ option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
131
140
 
132
- option(GGML_CURL "ggml: use libcurl to download model from an URL" OFF)
133
141
  option(GGML_HIPBLAS "ggml: use hipBLAS" OFF)
134
142
  option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
135
143
  option(GGML_VULKAN "ggml: use Vulkan" OFF)
136
144
  option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
137
145
  option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF)
138
146
  option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug output" OFF)
147
+ option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF)
148
+ option(GGML_VULKAN_PERF "ggml: enable Vulkan perf output" OFF)
139
149
  option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
140
150
  option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
141
151
  option(GGML_KOMPUTE "ggml: use Kompute" OFF)
@@ -207,6 +217,7 @@ set(GGML_PUBLIC_HEADERS
207
217
  include/ggml-alloc.h
208
218
  include/ggml-backend.h
209
219
  include/ggml-blas.h
220
+ include/ggml-cann.h
210
221
  include/ggml-cuda.h
211
222
  include/ggml.h
212
223
  include/ggml-kompute.h
@@ -7,8 +7,8 @@ extern "C" {
7
7
  #endif
8
8
 
9
9
  typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
10
- typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
11
- typedef struct ggml_backend * ggml_backend_t;
10
+ typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
11
+ typedef struct ggml_backend * ggml_backend_t;
12
12
 
13
13
  // Tensor allocator
14
14
  struct ggml_tallocr {
@@ -24,7 +24,7 @@ GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, st
24
24
  // Graph allocator
25
25
  /*
26
26
  Example usage:
27
- ggml_gallocr_t galloc = ggml_gallocr_new(ggml_bacckend_cpu_buffer_type());
27
+ ggml_gallocr_t galloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type());
28
28
 
29
29
  // optional: create a worst-case graph and reserve the buffers to avoid reallocations
30
30
  ggml_gallocr_reserve(galloc, build_graph(max_batch));
@@ -12,43 +12,52 @@ extern "C" {
12
12
  typedef struct ggml_backend_event * ggml_backend_event_t;
13
13
  typedef struct ggml_backend * ggml_backend_t;
14
14
  typedef void * ggml_backend_graph_plan_t;
15
+ typedef struct ggml_backend_reg * ggml_backend_reg_t;
16
+ typedef struct ggml_backend_device * ggml_backend_dev_t;
17
+
15
18
 
16
19
  //
17
- // Backend buffer
20
+ // Backend buffer type
18
21
  //
19
22
 
20
- // buffer type
21
- GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
22
- GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
23
- GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
24
- GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
25
- GGML_API GGML_CALL size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
26
- GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
23
+ GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
24
+ GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
25
+ GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
26
+ GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
27
+ GGML_API size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
28
+ GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
29
+ GGML_API ggml_backend_dev_t ggml_backend_buft_get_device (ggml_backend_buffer_type_t buft);
30
+
31
+ //
32
+ // Backend buffer
33
+ //
27
34
 
28
- // buffer
29
35
  enum ggml_backend_buffer_usage {
30
36
  GGML_BACKEND_BUFFER_USAGE_ANY = 0,
31
37
  GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
32
38
  GGML_BACKEND_BUFFER_USAGE_COMPUTE = 2,
33
39
  };
34
40
 
35
- GGML_API const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer);
36
- GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
37
- GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
38
- GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
39
- GGML_API GGML_CALL void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
40
- GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
41
- GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
42
- GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
43
- GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value);
44
- GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer);
45
- GGML_API void ggml_backend_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
46
- GGML_API enum ggml_backend_buffer_usage ggml_backend_buffer_get_usage (ggml_backend_buffer_t buffer);
47
- GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer);
48
- GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer);
41
+ GGML_API const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer);
42
+ GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
43
+ GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
44
+ GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
45
+ GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
46
+ GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
47
+ GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
48
+ GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
49
+ GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value);
50
+ GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer);
51
+ GGML_API void ggml_backend_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
52
+ GGML_API enum ggml_backend_buffer_usage ggml_backend_buffer_get_usage (ggml_backend_buffer_t buffer);
53
+ GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer);
54
+ GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer);
55
+
56
+ // tensor copy between different backends
57
+ GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
49
58
 
50
59
  //
51
- // Backend
60
+ // Backend (stream)
52
61
  //
53
62
 
54
63
  GGML_API ggml_guid_t ggml_backend_guid(ggml_backend_t backend);
@@ -63,8 +72,10 @@ extern "C" {
63
72
  GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
64
73
  GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
65
74
 
66
- GGML_API GGML_CALL void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
67
- GGML_API GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
75
+ // "offset" refers to the offset of the tensor data for setting/getting data
76
+ GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
77
+ GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
78
+ GGML_API void ggml_backend_tensor_memset( struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
68
79
 
69
80
  GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
70
81
 
@@ -74,64 +85,118 @@ extern "C" {
74
85
  GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
75
86
  GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
76
87
  GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph);
88
+
89
+ // NOTE: will be removed, use device version instead
77
90
  GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
78
91
  GGML_API bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
79
92
  GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op);
80
93
 
81
- // tensor copy between different backends
82
- GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
83
-
84
94
  // asynchronous copy
85
95
  // the copy is performed after all the currently queued operations in backend_src
86
96
  // backend_dst will wait for the copy to complete before performing other operations
87
97
  // automatic fallback to sync copy if async is not supported
88
98
  GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
89
99
 
90
- // events
91
- GGML_API ggml_backend_event_t ggml_backend_event_new (ggml_backend_t backend);
92
- GGML_API void ggml_backend_event_free (ggml_backend_event_t event);
93
- GGML_API void ggml_backend_event_record (ggml_backend_event_t event);
94
- GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
95
- GGML_API void ggml_backend_event_wait (ggml_backend_t backend, ggml_backend_event_t event);
100
+ GGML_API ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend);
96
101
 
97
102
  //
98
- // CPU backend
103
+ // Events
99
104
  //
100
105
 
101
- GGML_API ggml_backend_t ggml_backend_cpu_init(void);
106
+ GGML_API ggml_backend_event_t ggml_backend_event_new(ggml_backend_dev_t device);
107
+ GGML_API void ggml_backend_event_free(ggml_backend_event_t event);
108
+ GGML_API void ggml_backend_event_record(ggml_backend_event_t event, ggml_backend_t backend);
109
+ GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
110
+ GGML_API void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event);
102
111
 
103
- GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
104
- GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
105
- GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
112
+ //
113
+ // Backend device
114
+ //
106
115
 
107
- // Create a backend buffer from an existing pointer
108
- GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
116
+ enum ggml_backend_dev_type {
117
+ GGML_BACKEND_DEVICE_TYPE_CPU,
118
+ GGML_BACKEND_DEVICE_TYPE_GPU,
119
+ // devices with full capabilities (excludes backends such as BLAS that only support matrix multiplication)
120
+ GGML_BACKEND_DEVICE_TYPE_CPU_FULL,
121
+ GGML_BACKEND_DEVICE_TYPE_GPU_FULL
122
+ };
109
123
 
110
- GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
124
+ // functionality supported by the device
125
+ struct ggml_backend_dev_caps {
126
+ // asynchronous operations
127
+ bool async;
128
+ // pinned host buffer
129
+ bool host_buffer;
130
+ // event synchronization
131
+ bool events;
132
+ };
111
133
 
112
- #ifdef GGML_USE_CPU_HBM
113
- GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
114
- #endif
134
+ // all the device properties
135
+ struct ggml_backend_dev_props {
136
+ const char * name;
137
+ const char * description;
138
+ size_t memory_free;
139
+ size_t memory_total;
140
+ enum ggml_backend_dev_type type;
141
+ struct ggml_backend_dev_caps caps;
142
+ };
143
+
144
+ GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device);
145
+ GGML_API const char * ggml_backend_dev_description(ggml_backend_dev_t device);
146
+ GGML_API void ggml_backend_dev_memory(ggml_backend_dev_t device, size_t * free, size_t * total);
147
+ GGML_API enum ggml_backend_dev_type ggml_backend_dev_type(ggml_backend_dev_t device);
148
+ GGML_API void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props);
149
+ GGML_API ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device);
150
+ GGML_API ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * params);
151
+ GGML_API ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t device);
152
+ GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device);
153
+ GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size);
154
+
155
+ GGML_API bool ggml_backend_dev_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
156
+ GGML_API bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft);
157
+ GGML_API bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
115
158
 
116
159
  //
117
- // Backend registry
160
+ // Backend (reg)
118
161
  //
119
162
 
120
- // The backend registry is a registry of all the available backends, and allows initializing backends in a generic way
163
+ GGML_API const char * ggml_backend_reg_name(ggml_backend_reg_t reg);
164
+ GGML_API size_t ggml_backend_reg_dev_count(ggml_backend_reg_t reg);
165
+ GGML_API ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index);
166
+ GGML_API void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * name);
121
167
 
122
- GGML_API size_t ggml_backend_reg_get_count(void);
123
- GGML_API size_t ggml_backend_reg_find_by_name(const char * name);
124
- GGML_API ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str); // str is backend_name:params (params is optional)
125
- GGML_API const char * ggml_backend_reg_get_name(size_t i);
126
- GGML_API ggml_backend_t ggml_backend_reg_init_backend(size_t i, const char * params); // params is backend-specific
127
- GGML_API ggml_backend_buffer_type_t ggml_backend_reg_get_default_buffer_type(size_t i);
128
- GGML_API ggml_backend_buffer_t ggml_backend_reg_alloc_buffer(size_t i, size_t size);
168
+
169
+ // Functions that may be obtained using ggml_backend_reg_get_proc_address
170
+ typedef ggml_backend_buffer_type_t (*ggml_backend_split_buffer_type_t)(const float *);
171
+
172
+ //
173
+ // Backend registry
174
+ //
175
+
176
+ // Backend (reg) enumeration
177
+ GGML_API size_t ggml_backend_reg_count(void);
178
+ GGML_API ggml_backend_reg_t ggml_backend_reg_get(size_t index);
179
+ GGML_API ggml_backend_reg_t ggml_backend_reg_by_name(const char * name);
180
+
181
+ // Device enumeration
182
+ GGML_API size_t ggml_backend_dev_count(void);
183
+ GGML_API ggml_backend_dev_t ggml_backend_dev_get(size_t index);
184
+ GGML_API ggml_backend_dev_t ggml_backend_dev_by_name(const char * name);
185
+ GGML_API ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type);
186
+
187
+ // Direct backend (stream) initialization
188
+ // = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params)
189
+ GGML_API ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params);
190
+ // = ggml_backend_dev_init(ggml_backend_dev_by_type(type), params)
191
+ GGML_API ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params);
192
+ // = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU_FULL) OR ggml_backend_dev_by_type(CPU_FULL), NULL)
193
+ GGML_API ggml_backend_t ggml_backend_init_best(void);
129
194
 
130
195
  //
131
196
  // Backend scheduler
132
197
  //
133
198
 
134
- // The backend scheduler allows for multiple backends to be used together
199
+ // The backend scheduler allows for multiple backend devices to be used together
135
200
  // Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
136
201
  // The backends are selected based on:
137
202
  // - the backend that supports the operation
@@ -166,9 +231,9 @@ extern "C" {
166
231
  }
167
232
  */
168
233
 
169
- struct ggml_backend_sched;
170
234
  typedef struct ggml_backend_sched * ggml_backend_sched_t;
171
235
 
236
+ // Evaluation callback for each node in the graph (set with ggml_backend_sched_set_eval_callback)
172
237
  // when ask == true, the scheduler wants to know if the user wants to observe this node
173
238
  // this allows the scheduler to batch nodes together in order to evaluate them in a single call
174
239
  //
@@ -182,7 +247,7 @@ extern "C" {
182
247
  GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
183
248
 
184
249
  // Initialize backend buffers from a measure graph
185
- GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
250
+ GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph); // returns success
186
251
 
187
252
  GGML_API int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched);
188
253
  GGML_API ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i);
@@ -197,7 +262,7 @@ extern "C" {
197
262
  GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
198
263
 
199
264
  // Allocate and compute graph on the backend scheduler
200
- GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
265
+ GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph); // returns success
201
266
  GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
202
267
  GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
203
268
  GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched);
@@ -223,7 +288,7 @@ extern "C" {
223
288
  GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
224
289
  GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
225
290
 
226
- typedef bool (*GGML_CALL ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
291
+ typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
227
292
 
228
293
  // Compare the output of two backends
229
294
  GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
@@ -232,6 +297,26 @@ extern "C" {
232
297
  GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
233
298
  GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
234
299
 
300
+ //
301
+ // CPU backend
302
+ //
303
+
304
+ GGML_API ggml_backend_t ggml_backend_cpu_init(void);
305
+
306
+ GGML_API bool ggml_backend_is_cpu (ggml_backend_t backend);
307
+ GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
308
+ GGML_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
309
+ GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
310
+
311
+ // Create a backend buffer from an existing pointer
312
+ GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
313
+ GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
314
+
315
+ GGML_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
316
+
317
+ #ifdef GGML_USE_CPU_HBM
318
+ GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
319
+ #endif
235
320
 
236
321
  #ifdef __cplusplus
237
322
  }
@@ -9,13 +9,13 @@ extern "C" {
9
9
  #endif
10
10
 
11
11
  // backend API
12
- GGML_API GGML_CALL ggml_backend_t ggml_backend_blas_init(void);
12
+ GGML_API ggml_backend_t ggml_backend_blas_init(void);
13
13
 
14
- GGML_API GGML_CALL bool ggml_backend_is_blas(ggml_backend_t backend);
14
+ GGML_API bool ggml_backend_is_blas(ggml_backend_t backend);
15
15
 
16
16
  // number of threads used for conversion to float
17
17
  // for openblas and blis, this will also set the number of threads used for blas operations
18
- GGML_API GGML_CALL void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
18
+ GGML_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
19
19
 
20
20
 
21
21
  #ifdef __cplusplus
@@ -44,7 +44,7 @@ extern "C" {
44
44
  * @param device The index of the device to initialize.
45
45
  * @return A pointer to the initialized backend instance, or nullptr on failure.
46
46
  */
47
- GGML_API GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device);
47
+ GGML_API ggml_backend_t ggml_backend_cann_init(int32_t device);
48
48
 
49
49
  /**
50
50
  * @brief Checks if a given backend is a CANN backend.
@@ -55,7 +55,7 @@ GGML_API GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device);
55
55
  * @param backend The backend instance to check.
56
56
  * @return True if the backend is a CANN backend, false otherwise.
57
57
  */
58
- GGML_API GGML_CALL bool ggml_backend_is_cann(ggml_backend_t backend);
58
+ GGML_API bool ggml_backend_is_cann(ggml_backend_t backend);
59
59
 
60
60
  /**
61
61
  * @brief Retrieves the CANN buffer type for a specified device.
@@ -67,7 +67,7 @@ GGML_API GGML_CALL bool ggml_backend_is_cann(ggml_backend_t backend);
67
67
  * @return A pointer to the buffer type interface for the specified device, or
68
68
  * nullptr if the device index is out of range.
69
69
  */
70
- GGML_API GGML_CALL ggml_backend_buffer_type_t
70
+ GGML_API ggml_backend_buffer_type_t
71
71
  ggml_backend_cann_buffer_type(int32_t device);
72
72
 
73
73
  /**
@@ -78,7 +78,14 @@ ggml_backend_cann_buffer_type(int32_t device);
78
78
  *
79
79
  * @return The number of CANN devices available.
80
80
  */
81
- GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
81
+ GGML_API int32_t ggml_backend_cann_get_device_count(void);
82
+
83
+ /**
84
+ * @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
85
+ *
86
+ * @return A pointer to the host buffer type interface.
87
+ */
88
+ GGML_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
82
89
 
83
90
  /**
84
91
  * @brief Retrieves the description of a specific CANN device.
@@ -90,7 +97,7 @@ GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
90
97
  * @param description Pointer to a buffer where the description will be written.
91
98
  * @param description_size Size of the description buffer.
92
99
  */
93
- GGML_API GGML_CALL void ggml_backend_cann_get_device_description(
100
+ GGML_API void ggml_backend_cann_get_device_description(
94
101
  int32_t device, char* description, size_t description_size);
95
102
 
96
103
  /**
@@ -105,20 +112,9 @@ GGML_API GGML_CALL void ggml_backend_cann_get_device_description(
105
112
  * @param total Pointer to a variable where the total memory size will be
106
113
  * stored.
107
114
  */
108
- GGML_API GGML_CALL void ggml_backend_cann_get_device_memory(int32_t device,
109
- size_t* free,
110
- size_t* total);
111
-
112
- /**
113
- * @brief Set the logging callback for GGML.
114
- *
115
- * This function sets the logging callback and user data for logging.
116
- *
117
- * @param log_callback The logging callback to set.
118
- * @param user_data User data to pass to the logging callback.
119
- */
120
- GGML_API void ggml_backend_cann_log_set_callback(ggml_log_callback log_callback,
121
- void* user_data);
115
+ GGML_API void ggml_backend_cann_get_device_memory(int32_t device,
116
+ size_t* free,
117
+ size_t* total);
122
118
 
123
119
  #ifdef __cplusplus
124
120
  }
@@ -3,6 +3,10 @@
3
3
  #include "ggml.h"
4
4
  #include "ggml-backend.h"
5
5
 
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
6
10
  #ifdef GGML_USE_HIPBLAS
7
11
  #define GGML_CUDA_NAME "ROCm"
8
12
  #define GGML_CUBLAS_NAME "hipBLAS"
@@ -13,35 +17,31 @@
13
17
  #define GGML_CUDA_NAME "CUDA"
14
18
  #define GGML_CUBLAS_NAME "cuBLAS"
15
19
  #endif
16
-
17
- #ifdef __cplusplus
18
- extern "C" {
19
- #endif
20
-
21
20
  #define GGML_CUDA_MAX_DEVICES 16
22
21
 
23
22
  // backend API
24
- GGML_API GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device);
23
+ GGML_API ggml_backend_t ggml_backend_cuda_init(int device);
25
24
 
26
- GGML_API GGML_CALL bool ggml_backend_is_cuda(ggml_backend_t backend);
25
+ GGML_API bool ggml_backend_is_cuda(ggml_backend_t backend);
27
26
 
28
27
  // device buffer
29
- GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
28
+ GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
30
29
 
31
30
  // split tensor buffer that splits matrices by rows across multiple devices
32
- GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(const float * tensor_split);
31
+ GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(const float * tensor_split);
33
32
 
34
33
  // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
35
- GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
34
+ GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
35
+
36
+ GGML_API int ggml_backend_cuda_get_device_count(void);
37
+ GGML_API void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
38
+ GGML_API void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
36
39
 
37
- GGML_API GGML_CALL int ggml_backend_cuda_get_device_count(void);
38
- GGML_API GGML_CALL void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
39
- GGML_API GGML_CALL void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
40
+ GGML_API bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
41
+ GGML_API void ggml_backend_cuda_unregister_host_buffer(void * buffer);
40
42
 
41
- GGML_API GGML_CALL bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
42
- GGML_API GGML_CALL void ggml_backend_cuda_unregister_host_buffer(void * buffer);
43
+ GGML_API ggml_backend_reg_t ggml_backend_cuda_reg(void);
43
44
 
44
- GGML_API void ggml_backend_cuda_log_set_callback(ggml_log_callback log_callback, void * user_data);
45
45
  #ifdef __cplusplus
46
46
  }
47
47
  #endif
@@ -1,3 +1,5 @@
1
+ // Note: this description is outdated
2
+ //
1
3
  // An interface allowing to compute ggml_cgraph with Metal
2
4
  //
3
5
  // This is a fully functional interface that extends ggml with GPU support for Apple devices.
@@ -25,9 +27,6 @@
25
27
  #include <stddef.h>
26
28
  #include <stdbool.h>
27
29
 
28
- // max memory buffers that can be mapped to the device
29
- #define GGML_METAL_MAX_BUFFERS 64
30
-
31
30
  struct ggml_tensor;
32
31
  struct ggml_cgraph;
33
32
 
@@ -40,17 +39,15 @@ extern "C" {
40
39
  // user-code should use only these functions
41
40
  //
42
41
 
43
- GGML_API void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
44
-
45
42
  GGML_API ggml_backend_t ggml_backend_metal_init(void);
46
43
 
47
44
  GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
48
45
 
49
- GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size);
46
+ GGML_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size);
50
47
 
51
- GGML_API void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb);
48
+ GGML_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
52
49
 
53
- GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
50
+ GGML_API ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
54
51
 
55
52
  // helper to check if the device supports a specific family
56
53
  // ideally, the user code should be doing these checks
@@ -10,14 +10,14 @@ extern "C" {
10
10
  #define GGML_RPC_MAX_SERVERS 16
11
11
 
12
12
  // backend API
13
- GGML_API GGML_CALL ggml_backend_t ggml_backend_rpc_init(const char * endpoint);
14
- GGML_API GGML_CALL bool ggml_backend_is_rpc(ggml_backend_t backend);
13
+ GGML_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint);
14
+ GGML_API bool ggml_backend_is_rpc(ggml_backend_t backend);
15
15
 
16
- GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint);
16
+ GGML_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint);
17
17
 
18
- GGML_API GGML_CALL void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
18
+ GGML_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
19
19
 
20
- GGML_API GGML_CALL void start_rpc_server(ggml_backend_t backend, const char * endpoint, size_t free_mem, size_t total_mem);
20
+ GGML_API void start_rpc_server(ggml_backend_t backend, const char * endpoint, size_t free_mem, size_t total_mem);
21
21
 
22
22
  #ifdef __cplusplus
23
23
  }
@@ -23,20 +23,20 @@ GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
23
23
  GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
24
24
 
25
25
  // split tensor buffer that splits matrices by rows across multiple devices
26
- GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
26
+ GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
27
27
 
28
28
  // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
29
29
  GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
30
30
 
31
- GGML_API void ggml_backend_sycl_print_sycl_devices(void);
32
- GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len);
33
- GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
34
- GGML_API GGML_CALL int ggml_backend_sycl_get_device_count();
35
- GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
31
+ GGML_API void ggml_backend_sycl_print_sycl_devices(void);
32
+ GGML_API void ggml_sycl_get_gpu_list(int *id_list, int max_len);
33
+ GGML_API void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
34
+ GGML_API int ggml_backend_sycl_get_device_count();
35
+ GGML_API void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
36
36
 
37
37
  // SYCL doesn't support registering host memory, keep here for reference
38
- // GGML_API GGML_CALL bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
39
- // GGML_API GGML_CALL void ggml_backend_sycl_unregister_host_buffer(void * buffer);
38
+ // GGML_API bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
39
+ // GGML_API void ggml_backend_sycl_unregister_host_buffer(void * buffer);
40
40
  #ifdef __cplusplus
41
41
  }
42
42
  #endif
@@ -13,16 +13,16 @@ extern "C" {
13
13
  GGML_API void ggml_vk_instance_init(void);
14
14
 
15
15
  // backend API
16
- GGML_API GGML_CALL ggml_backend_t ggml_backend_vk_init(size_t dev_num);
16
+ GGML_API ggml_backend_t ggml_backend_vk_init(size_t dev_num);
17
17
 
18
- GGML_API GGML_CALL bool ggml_backend_is_vk(ggml_backend_t backend);
19
- GGML_API GGML_CALL int ggml_backend_vk_get_device_count(void);
20
- GGML_API GGML_CALL void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size);
21
- GGML_API GGML_CALL void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total);
18
+ GGML_API bool ggml_backend_is_vk(ggml_backend_t backend);
19
+ GGML_API int ggml_backend_vk_get_device_count(void);
20
+ GGML_API void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size);
21
+ GGML_API void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total);
22
22
 
23
- GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num);
23
+ GGML_API ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num);
24
24
  // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
25
- GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void);
25
+ GGML_API ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void);
26
26
 
27
27
  #ifdef __cplusplus
28
28
  }