@fugood/llama.node 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. package/CMakeLists.txt +2 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/package.json +1 -1
  17. package/src/DetokenizeWorker.cpp +1 -1
  18. package/src/EmbeddingWorker.cpp +2 -2
  19. package/src/LlamaCompletionWorker.cpp +8 -8
  20. package/src/LlamaCompletionWorker.h +2 -2
  21. package/src/LlamaContext.cpp +8 -9
  22. package/src/TokenizeWorker.cpp +1 -1
  23. package/src/common.hpp +4 -4
  24. package/src/llama.cpp/.github/workflows/build.yml +43 -9
  25. package/src/llama.cpp/.github/workflows/docker.yml +3 -0
  26. package/src/llama.cpp/CMakeLists.txt +7 -4
  27. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  28. package/src/llama.cpp/common/CMakeLists.txt +0 -2
  29. package/src/llama.cpp/common/arg.cpp +642 -607
  30. package/src/llama.cpp/common/arg.h +22 -22
  31. package/src/llama.cpp/common/common.cpp +79 -281
  32. package/src/llama.cpp/common/common.h +130 -100
  33. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  34. package/src/llama.cpp/common/log.cpp +50 -50
  35. package/src/llama.cpp/common/log.h +18 -18
  36. package/src/llama.cpp/common/ngram-cache.cpp +36 -36
  37. package/src/llama.cpp/common/ngram-cache.h +19 -19
  38. package/src/llama.cpp/common/sampling.cpp +116 -108
  39. package/src/llama.cpp/common/sampling.h +20 -20
  40. package/src/llama.cpp/docs/build.md +37 -17
  41. package/src/llama.cpp/examples/CMakeLists.txt +1 -1
  42. package/src/llama.cpp/examples/batched/batched.cpp +14 -14
  43. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
  44. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
  45. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
  46. package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
  47. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
  48. package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
  49. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
  50. package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
  51. package/src/llama.cpp/examples/imatrix/imatrix.cpp +20 -11
  52. package/src/llama.cpp/examples/infill/infill.cpp +40 -86
  53. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +42 -151
  54. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  55. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
  56. package/src/llama.cpp/examples/llava/clip.cpp +1 -0
  57. package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
  58. package/src/llama.cpp/examples/llava/llava.cpp +37 -3
  59. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
  60. package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
  61. package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
  62. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  63. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +14 -14
  64. package/src/llama.cpp/examples/lookup/lookup.cpp +29 -29
  65. package/src/llama.cpp/examples/main/main.cpp +64 -109
  66. package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
  67. package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
  68. package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
  69. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
  70. package/src/llama.cpp/examples/retrieval/retrieval.cpp +13 -13
  71. package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
  72. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +34 -17
  73. package/src/llama.cpp/examples/server/CMakeLists.txt +4 -13
  74. package/src/llama.cpp/examples/server/server.cpp +553 -691
  75. package/src/llama.cpp/examples/server/utils.hpp +312 -25
  76. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  77. package/src/llama.cpp/examples/simple/simple.cpp +128 -96
  78. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
  79. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
  80. package/src/llama.cpp/examples/speculative/speculative.cpp +54 -51
  81. package/src/llama.cpp/examples/tokenize/tokenize.cpp +2 -2
  82. package/src/llama.cpp/ggml/CMakeLists.txt +15 -9
  83. package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
  84. package/src/llama.cpp/ggml/include/ggml-backend.h +46 -33
  85. package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
  86. package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
  87. package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
  88. package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
  89. package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
  90. package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
  91. package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
  92. package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
  93. package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
  94. package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
  95. package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
  96. package/src/llama.cpp/ggml/include/ggml.h +53 -393
  97. package/src/llama.cpp/ggml/src/CMakeLists.txt +66 -1149
  98. package/src/llama.cpp/ggml/src/ggml-aarch64.c +46 -3126
  99. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
  100. package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -27
  101. package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
  102. package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
  103. package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  104. package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
  105. package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
  106. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +6 -25
  107. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
  108. package/src/llama.cpp/ggml/src/ggml-backend.cpp +303 -864
  109. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
  110. package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +213 -65
  111. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
  112. package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +255 -149
  113. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
  114. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
  115. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
  116. package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -243
  117. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
  118. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  119. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
  120. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
  121. package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +667 -1
  122. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
  123. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
  124. package/src/llama.cpp/ggml/src/ggml-impl.h +366 -16
  125. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
  126. package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +238 -72
  127. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
  128. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
  129. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
  130. package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
  131. package/src/llama.cpp/ggml/src/ggml-quants.c +187 -10692
  132. package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
  133. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
  134. package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +475 -300
  135. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
  136. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
  137. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +40 -0
  138. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +258 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
  140. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +2 -22
  141. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
  142. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
  143. package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3584 -4142
  144. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +69 -67
  145. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +3 -3
  146. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
  147. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
  148. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
  149. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
  150. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
  151. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
  152. package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  153. package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
  154. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
  155. package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +555 -623
  156. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +125 -206
  157. package/src/llama.cpp/ggml/src/ggml.c +4032 -19890
  158. package/src/llama.cpp/include/llama.h +67 -33
  159. package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
  160. package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
  161. package/src/llama.cpp/src/CMakeLists.txt +2 -1
  162. package/src/llama.cpp/src/llama-sampling.cpp +745 -105
  163. package/src/llama.cpp/src/llama-sampling.h +21 -2
  164. package/src/llama.cpp/src/llama-vocab.cpp +49 -9
  165. package/src/llama.cpp/src/llama-vocab.h +35 -11
  166. package/src/llama.cpp/src/llama.cpp +2636 -2406
  167. package/src/llama.cpp/src/unicode-data.cpp +2 -2
  168. package/src/llama.cpp/tests/CMakeLists.txt +1 -2
  169. package/src/llama.cpp/tests/test-arg-parser.cpp +14 -14
  170. package/src/llama.cpp/tests/test-backend-ops.cpp +185 -60
  171. package/src/llama.cpp/tests/test-barrier.cpp +1 -0
  172. package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
  173. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
  174. package/src/llama.cpp/tests/test-log.cpp +2 -2
  175. package/src/llama.cpp/tests/test-opt.cpp +853 -142
  176. package/src/llama.cpp/tests/test-quantize-fns.cpp +22 -19
  177. package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
  178. package/src/llama.cpp/tests/test-rope.cpp +1 -0
  179. package/src/llama.cpp/tests/test-sampling.cpp +162 -137
  180. package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
  181. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
  182. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
  183. package/src/llama.cpp/common/train.cpp +0 -1515
  184. package/src/llama.cpp/common/train.h +0 -233
  185. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
  186. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
  187. package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
  188. /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
  189. /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
  190. /package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +0 -0
@@ -0,0 +1,17 @@
1
+ #pragma once
2
+ #include "common.h"
3
+ #include <stdint.h>
4
+
5
+ #ifdef __cplusplus
6
+ extern "C" {
7
+ #endif
8
+
9
+ size_t ggml_backend_amx_get_alloc_size(const struct ggml_tensor * tensor);
10
+
11
+ void ggml_backend_amx_convert_weight(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
12
+
13
+ void ggml_backend_amx_mul_mat(ggml_backend_amx_context * ctx, struct ggml_tensor * dst);
14
+
15
+ #ifdef __cplusplus
16
+ }
17
+ #endif
@@ -22,7 +22,7 @@ extern "C" {
22
22
  size_t (*get_max_size) (ggml_backend_buffer_type_t buft);
23
23
  // (optional) data size needed to allocate the tensor, including padding (defaults to ggml_nbytes)
24
24
  size_t (*get_alloc_size)(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor);
25
- // (optional) check if tensor data is in host memory (defaults to false)
25
+ // (optional) check if tensor data is in host memory and uses standard ggml tensor layout (defaults to false)
26
26
  bool (*is_host) (ggml_backend_buffer_type_t buft);
27
27
  };
28
28
 
@@ -37,7 +37,6 @@ extern "C" {
37
37
  //
38
38
 
39
39
  struct ggml_backend_buffer_i {
40
- const char * (*get_name) (ggml_backend_buffer_t buffer);
41
40
  // (optional) free the buffer
42
41
  void (*free_buffer) (ggml_backend_buffer_t buffer);
43
42
  // base address of the buffer
@@ -88,18 +87,16 @@ extern "C" {
88
87
 
89
88
  void (*free)(ggml_backend_t backend);
90
89
 
91
- // buffer allocation
92
- ggml_backend_buffer_type_t (*get_default_buffer_type)(ggml_backend_t backend);
93
-
94
90
  // (optional) asynchronous tensor data access
95
91
  void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
96
92
  void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
97
93
  bool (*cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst);
98
94
 
99
- // (optional) complete all pending operations
95
+ // (optional) complete all pending operations (required if the backend supports async operations)
100
96
  void (*synchronize)(ggml_backend_t backend);
101
97
 
102
- // (optional) compute graph with a plan (not used currently)
98
+ // (optional) graph plans (not used currently)
99
+ // compute graph with a plan
103
100
  ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
104
101
  void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
105
102
  // update the plan with a new graph - this should be faster than creating a new plan when the graph has the same topology
@@ -110,21 +107,6 @@ extern "C" {
110
107
  // compute graph (always async if supported by the backend)
111
108
  enum ggml_status (*graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
112
109
 
113
- // IMPORTANT: these functions have been moved to the device interface and will be removed from the backend interface
114
- // new backends should implement the device interface instead
115
-
116
- // These functions are being moved to the device interface
117
- // check if the backend can compute an operation
118
- bool (*supports_op) (ggml_backend_t backend, const struct ggml_tensor * op);
119
-
120
- // check if the backend can use tensors allocated in a buffer type
121
- bool (*supports_buft)(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
122
-
123
- // check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
124
- // these should be expensive operations with large batch sizes that may benefit from running on this backend
125
- // even if the weight has to be copied from the CPU temporarily
126
- bool (*offload_op) (ggml_backend_t backend, const struct ggml_tensor * op);
127
-
128
110
  // (optional) event synchronization
129
111
  // record an event on this stream
130
112
  void (*event_record)(ggml_backend_t backend, ggml_backend_event_t event);
@@ -184,9 +166,8 @@ extern "C" {
184
166
  // check if the backend can use tensors allocated in a buffer type
185
167
  bool (*supports_buft)(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft);
186
168
 
187
- // check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
188
- // these should be expensive operations with large batch sizes that may benefit from running on this backend
189
- // even if the weight has to be copied from the CPU temporarily
169
+ // (optional) check if the backend wants to run an operation, even if the weights are allocated in an incompatible buffer
170
+ // these should be expensive operations that may benefit from running on this backend instead of the CPU backend
190
171
  bool (*offload_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op);
191
172
 
192
173
  // (optional) event synchronization
@@ -0,0 +1,195 @@
1
+ #include "ggml-backend-impl.h"
2
+ #include "ggml-backend.h"
3
+ #include "ggml-cpu.h"
4
+ #include "ggml-impl.h"
5
+ #include <cstring>
6
+ #include <vector>
7
+
8
+ // Backend registry
9
+
10
+ #ifdef GGML_USE_CUDA
11
+ #include "ggml-cuda.h"
12
+ #endif
13
+
14
+ #ifdef GGML_USE_METAL
15
+ #include "ggml-metal.h"
16
+ #endif
17
+
18
+ #ifdef GGML_USE_SYCL
19
+ #include "ggml-sycl.h"
20
+ #endif
21
+
22
+ #ifdef GGML_USE_VULKAN
23
+ #include "ggml-vulkan.h"
24
+ #endif
25
+
26
+ #ifdef GGML_USE_BLAS
27
+ #include "ggml-blas.h"
28
+ #endif
29
+
30
+ #ifdef GGML_USE_RPC
31
+ #include "ggml-rpc.h"
32
+ #endif
33
+
34
+ #ifdef GGML_USE_AMX
35
+ # include "ggml-amx.h"
36
+ #endif
37
+
38
+ #ifdef GGML_USE_CANN
39
+ #include "ggml-cann.h"
40
+ #endif
41
+
42
+ #ifdef GGML_USE_KOMPUTE
43
+ #include "ggml-kompute.h"
44
+ #endif
45
+
46
+ struct ggml_backend_registry {
47
+ std::vector<ggml_backend_reg_t> backends;
48
+ std::vector<ggml_backend_dev_t> devices;
49
+
50
+ ggml_backend_registry() {
51
+ #ifdef GGML_USE_CUDA
52
+ register_backend(ggml_backend_cuda_reg());
53
+ #endif
54
+ #ifdef GGML_USE_METAL
55
+ register_backend(ggml_backend_metal_reg());
56
+ #endif
57
+ #ifdef GGML_USE_SYCL
58
+ register_backend(ggml_backend_sycl_reg());
59
+ #endif
60
+ #ifdef GGML_USE_VULKAN
61
+ register_backend(ggml_backend_vk_reg());
62
+ #endif
63
+ #ifdef GGML_USE_CANN
64
+ register_backend(ggml_backend_cann_reg());
65
+ #endif
66
+ #ifdef GGML_USE_BLAS
67
+ register_backend(ggml_backend_blas_reg());
68
+ #endif
69
+ #ifdef GGML_USE_RPC
70
+ register_backend(ggml_backend_rpc_reg());
71
+ #endif
72
+ #ifdef GGML_USE_AMX
73
+ register_backend(ggml_backend_amx_reg());
74
+ #endif
75
+ #ifdef GGML_USE_KOMPUTE
76
+ register_backend(ggml_backend_kompute_reg());
77
+ #endif
78
+
79
+ register_backend(ggml_backend_cpu_reg());
80
+ }
81
+
82
+ void register_backend(ggml_backend_reg_t reg) {
83
+ if (!reg) {
84
+ return;
85
+ }
86
+
87
+ #ifndef NDEBUG
88
+ GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
89
+ __func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
90
+ #endif
91
+ backends.push_back(reg);
92
+ for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
93
+ register_device(ggml_backend_reg_dev_get(reg, i));
94
+ }
95
+ }
96
+
97
+ void register_device(ggml_backend_dev_t device) {
98
+ #ifndef NDEBUG
99
+ GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
100
+ #endif
101
+ devices.push_back(device);
102
+ }
103
+ };
104
+
105
+ static ggml_backend_registry & get_reg() {
106
+ static ggml_backend_registry reg;
107
+ return reg;
108
+ }
109
+
110
+ // Internal API
111
+ void ggml_backend_register(ggml_backend_reg_t reg) {
112
+ get_reg().register_backend(reg);
113
+ }
114
+
115
+ void ggml_backend_device_register(ggml_backend_dev_t device) {
116
+ get_reg().register_device(device);
117
+ }
118
+
119
+ // Backend (reg) enumeration
120
+ size_t ggml_backend_reg_count() {
121
+ return get_reg().backends.size();
122
+ }
123
+
124
+ ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
125
+ GGML_ASSERT(index < ggml_backend_reg_count());
126
+ return get_reg().backends[index];
127
+ }
128
+
129
+ ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
130
+ for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
131
+ ggml_backend_reg_t reg = ggml_backend_reg_get(i);
132
+ if (std::strcmp(ggml_backend_reg_name(reg), name) == 0) {
133
+ return reg;
134
+ }
135
+ }
136
+ return NULL;
137
+ }
138
+
139
+ // Device enumeration
140
+ size_t ggml_backend_dev_count() {
141
+ return get_reg().devices.size();
142
+ }
143
+
144
+ ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
145
+ GGML_ASSERT(index < ggml_backend_dev_count());
146
+ return get_reg().devices[index];
147
+ }
148
+
149
+ ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
150
+ for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
151
+ ggml_backend_dev_t dev = ggml_backend_dev_get(i);
152
+ if (strcmp(ggml_backend_dev_name(dev), name) == 0) {
153
+ return dev;
154
+ }
155
+ }
156
+ return NULL;
157
+ }
158
+
159
+ ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
160
+ for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
161
+ ggml_backend_dev_t dev = ggml_backend_dev_get(i);
162
+ if (ggml_backend_dev_type(dev) == type) {
163
+ return dev;
164
+ }
165
+ }
166
+ return NULL;
167
+ }
168
+
169
+ // Convenience functions
170
+ ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
171
+ ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
172
+ if (!dev) {
173
+ return NULL;
174
+ }
175
+ return ggml_backend_dev_init(dev, params);
176
+ }
177
+
178
+ ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
179
+ ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
180
+ if (!dev) {
181
+ return NULL;
182
+ }
183
+ return ggml_backend_dev_init(dev, params);
184
+ }
185
+
186
+ ggml_backend_t ggml_backend_init_best(void) {
187
+ ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU);
188
+ if (!dev) {
189
+ dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
190
+ }
191
+ if (!dev) {
192
+ return NULL;
193
+ }
194
+ return ggml_backend_dev_init(dev, NULL);
195
+ }