@fugood/llama.node 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. package/CMakeLists.txt +5 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +18 -1
  17. package/package.json +1 -1
  18. package/src/EmbeddingWorker.cpp +15 -5
  19. package/src/EmbeddingWorker.h +2 -1
  20. package/src/LlamaCompletionWorker.cpp +1 -1
  21. package/src/LlamaContext.cpp +81 -18
  22. package/src/LlamaContext.h +2 -0
  23. package/src/llama.cpp/.github/workflows/build.yml +197 -159
  24. package/src/llama.cpp/.github/workflows/docker.yml +5 -8
  25. package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
  26. package/src/llama.cpp/.github/workflows/server.yml +21 -14
  27. package/src/llama.cpp/CMakeLists.txt +11 -6
  28. package/src/llama.cpp/Sources/llama/llama.h +4 -0
  29. package/src/llama.cpp/cmake/common.cmake +33 -0
  30. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
  31. package/src/llama.cpp/common/CMakeLists.txt +6 -2
  32. package/src/llama.cpp/common/arg.cpp +426 -245
  33. package/src/llama.cpp/common/common.cpp +143 -80
  34. package/src/llama.cpp/common/common.h +81 -24
  35. package/src/llama.cpp/common/sampling.cpp +53 -19
  36. package/src/llama.cpp/common/sampling.h +22 -1
  37. package/src/llama.cpp/common/speculative.cpp +274 -0
  38. package/src/llama.cpp/common/speculative.h +28 -0
  39. package/src/llama.cpp/docs/build.md +101 -148
  40. package/src/llama.cpp/examples/CMakeLists.txt +32 -13
  41. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  42. package/src/llama.cpp/examples/batched/batched.cpp +5 -4
  43. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  44. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  45. package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
  46. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
  47. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  48. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
  49. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  50. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
  51. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
  52. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
  53. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  54. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
  55. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  56. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
  57. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  58. package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
  59. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  60. package/src/llama.cpp/examples/imatrix/imatrix.cpp +11 -2
  61. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  62. package/src/llama.cpp/examples/infill/infill.cpp +1 -1
  63. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
  64. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +405 -316
  65. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  66. package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
  67. package/src/llama.cpp/examples/llava/clip.cpp +262 -66
  68. package/src/llama.cpp/examples/llava/clip.h +8 -2
  69. package/src/llama.cpp/examples/llava/llava-cli.cpp +1 -1
  70. package/src/llama.cpp/examples/llava/llava.cpp +46 -19
  71. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +1 -1
  72. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
  73. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  74. package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -1
  75. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  76. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -1
  77. package/src/llama.cpp/examples/lookup/lookup.cpp +2 -2
  78. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  79. package/src/llama.cpp/examples/main/main.cpp +9 -5
  80. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
  81. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  82. package/src/llama.cpp/examples/parallel/parallel.cpp +1 -1
  83. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  84. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  85. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  86. package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
  87. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  88. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  89. package/src/llama.cpp/examples/retrieval/retrieval.cpp +4 -4
  90. package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
  91. package/src/llama.cpp/examples/run/run.cpp +911 -0
  92. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  93. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -4
  94. package/src/llama.cpp/examples/server/CMakeLists.txt +3 -7
  95. package/src/llama.cpp/examples/server/server.cpp +1758 -886
  96. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  97. package/src/llama.cpp/examples/server/utils.hpp +94 -304
  98. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  99. package/src/llama.cpp/examples/simple/simple.cpp +4 -0
  100. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +1 -1
  101. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +3 -0
  102. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  103. package/src/llama.cpp/examples/speculative/speculative.cpp +16 -15
  104. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
  105. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
  106. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  107. package/src/llama.cpp/examples/tokenize/tokenize.cpp +1 -1
  108. package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
  109. package/src/llama.cpp/examples/tts/tts.cpp +932 -0
  110. package/src/llama.cpp/ggml/CMakeLists.txt +46 -34
  111. package/src/llama.cpp/ggml/include/ggml-backend.h +16 -0
  112. package/src/llama.cpp/ggml/include/ggml-cpu.h +7 -49
  113. package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
  114. package/src/llama.cpp/ggml/include/ggml.h +106 -24
  115. package/src/llama.cpp/ggml/src/CMakeLists.txt +73 -24
  116. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1
  117. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +51 -11
  118. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +379 -22
  119. package/src/llama.cpp/ggml/src/ggml-backend.cpp +4 -4
  120. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -7
  121. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +5 -2
  122. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +33 -3
  123. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
  124. package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
  125. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +95 -35
  126. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
  127. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
  128. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
  129. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
  130. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
  131. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
  132. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
  133. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
  134. package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
  135. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +288 -213
  136. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  137. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  138. package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/common.h +19 -22
  139. package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.cpp +93 -92
  140. package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.h +2 -9
  141. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  142. package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +892 -190
  143. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +2 -24
  144. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  145. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  146. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +15 -0
  147. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +38 -25
  148. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  149. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  150. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +552 -399
  151. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +101 -136
  152. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +2 -2
  153. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +7 -10
  154. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
  155. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -6
  156. package/src/llama.cpp/ggml/src/ggml-impl.h +32 -11
  157. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +13 -9
  158. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +131 -64
  159. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +3 -6
  160. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +39 -0
  161. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +14 -7
  162. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
  163. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
  164. package/src/llama.cpp/ggml/src/ggml-opt.cpp +67 -80
  165. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -9
  166. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +3 -5
  167. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +5 -2
  168. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +13 -10
  169. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +2 -11
  170. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -0
  171. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +2 -2
  172. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
  173. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
  174. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +32 -13
  175. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +80 -61
  176. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
  177. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +159 -114
  178. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +6 -20
  181. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +4 -3
  182. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +8 -8
  183. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
  184. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
  185. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
  186. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +4 -1
  187. package/src/llama.cpp/ggml/src/ggml-threading.h +4 -2
  188. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +21 -7
  189. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1718 -399
  190. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +3 -1
  191. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +105 -31
  192. package/src/llama.cpp/ggml/src/ggml.c +367 -207
  193. package/src/llama.cpp/include/llama-cpp.h +25 -0
  194. package/src/llama.cpp/include/llama.h +26 -19
  195. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
  196. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
  197. package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
  198. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  199. package/src/llama.cpp/src/CMakeLists.txt +2 -7
  200. package/src/llama.cpp/src/llama-grammar.cpp +15 -15
  201. package/src/llama.cpp/src/llama-grammar.h +2 -5
  202. package/src/llama.cpp/src/llama-sampling.cpp +35 -90
  203. package/src/llama.cpp/src/llama-vocab.cpp +6 -1
  204. package/src/llama.cpp/src/llama.cpp +1748 -640
  205. package/src/llama.cpp/src/unicode.cpp +62 -51
  206. package/src/llama.cpp/src/unicode.h +9 -10
  207. package/src/llama.cpp/tests/CMakeLists.txt +48 -37
  208. package/src/llama.cpp/tests/test-arg-parser.cpp +2 -2
  209. package/src/llama.cpp/tests/test-backend-ops.cpp +140 -21
  210. package/src/llama.cpp/tests/test-chat-template.cpp +50 -4
  211. package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
  212. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
  213. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
  214. package/src/llama.cpp/tests/test-quantize-fns.cpp +3 -3
  215. package/src/llama.cpp/tests/test-rope.cpp +61 -20
  216. package/src/llama.cpp/tests/test-sampling.cpp +2 -2
  217. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
  218. package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
  219. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
  220. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
  221. package/src/llama.cpp/ggml/include/ggml-amx.h +0 -25
  222. package/src/llama.cpp/ggml/src/ggml-aarch64.c +0 -129
  223. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -19
  224. package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +0 -107
  225. package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
@@ -534,7 +534,6 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor
534
534
  size_t offset = ggml_dyn_tallocr_alloc(alloc, size, node);
535
535
  hn->buffer_id = buffer_id;
536
536
  hn->offset = offset;
537
- return;
538
537
  }
539
538
  }
540
539
 
@@ -8,6 +8,8 @@
8
8
  extern "C" {
9
9
  #endif
10
10
 
11
+ #define GGML_BACKEND_API_VERSION 1
12
+
11
13
  //
12
14
  // Backend buffer type
13
15
  //
@@ -63,20 +65,20 @@ extern "C" {
63
65
  enum ggml_backend_buffer_usage usage;
64
66
  };
65
67
 
66
- ggml_backend_buffer_t ggml_backend_buffer_init(
68
+ GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
67
69
  ggml_backend_buffer_type_t buft,
68
70
  struct ggml_backend_buffer_i iface,
69
71
  void * context,
70
72
  size_t size);
71
73
 
72
74
  // do not use directly, use ggml_backend_tensor_copy instead
73
- bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
75
+ GGML_API bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
74
76
 
75
77
  // multi-buffer
76
78
  // buffer that contains a collection of buffers
77
- ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
78
- bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
79
- void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
79
+ GGML_API ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
80
+ GGML_API bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
81
+ GGML_API void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
80
82
 
81
83
  //
82
84
  // Backend (stream)
@@ -199,17 +201,55 @@ extern "C" {
199
201
  };
200
202
 
201
203
  struct ggml_backend_reg {
202
- // int api_version; // TODO: for dynamic loading
204
+ int api_version; // initialize to GGML_BACKEND_API_VERSION
203
205
  struct ggml_backend_reg_i iface;
204
206
  void * context;
205
207
  };
206
208
 
207
-
208
209
  // Internal backend registry API
209
- void ggml_backend_register(ggml_backend_reg_t reg);
210
- void ggml_backend_device_register(ggml_backend_dev_t device);
211
- // TODO: backends can be loaded as a dynamic library, in which case it needs to export this function
212
- // typedef ggml_backend_register_t * (*ggml_backend_init)(void);
210
+ GGML_API void ggml_backend_register(ggml_backend_reg_t reg);
211
+ GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
212
+
213
+ // Add backend dynamic loading support to the backend
214
+
215
+ // Initialize the backend
216
+ typedef ggml_backend_reg_t (*ggml_backend_init_t)(void);
217
+ // Optional: obtain a score for the backend based on the system configuration
218
+ // Higher scores are preferred, 0 means the backend is not supported in the current system
219
+ typedef int (*ggml_backend_score_t)(void);
220
+
221
+ #ifdef GGML_BACKEND_DL
222
+ # ifdef __cplusplus
223
+ # define GGML_BACKEND_DL_IMPL(reg_fn) \
224
+ extern "C" { \
225
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
226
+ } \
227
+ ggml_backend_reg_t ggml_backend_init(void) { \
228
+ return reg_fn(); \
229
+ }
230
+ # define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \
231
+ extern "C" { \
232
+ GGML_BACKEND_API int ggml_backend_score(void); \
233
+ } \
234
+ int ggml_backend_score(void) { \
235
+ return score_fn(); \
236
+ }
237
+ # else
238
+ # define GGML_BACKEND_DL_IMPL(reg_fn) \
239
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
240
+ ggml_backend_reg_t ggml_backend_init(void) { \
241
+ return reg_fn(); \
242
+ }
243
+ # define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \
244
+ GGML_BACKEND_API int ggml_backend_score(void); \
245
+ int ggml_backend_score(void) { \
246
+ return score_fn(); \
247
+ }
248
+ # endif
249
+ #else
250
+ # define GGML_BACKEND_DL_IMPL(reg_fn)
251
+ # define GGML_BACKEND_DL_SCORE_IMPL(score_fn)
252
+ #endif
213
253
 
214
254
  #ifdef __cplusplus
215
255
  }
@@ -1,11 +1,34 @@
1
1
  #include "ggml-backend-impl.h"
2
2
  #include "ggml-backend.h"
3
- #include "ggml-cpu.h"
4
3
  #include "ggml-impl.h"
4
+ #include <algorithm>
5
+ #include <codecvt>
5
6
  #include <cstring>
7
+ #include <filesystem>
8
+ #include <locale>
9
+ #include <memory>
10
+ #include <string>
11
+ #include <type_traits>
6
12
  #include <vector>
7
13
 
14
+ #ifdef _WIN32
15
+ # define WIN32_LEAN_AND_MEAN
16
+ # ifndef NOMINMAX
17
+ # define NOMINMAX
18
+ # endif
19
+ # include <windows.h>
20
+ #elif defined(__APPLE__)
21
+ # include <mach-o/dyld.h>
22
+ # include <dlfcn.h>
23
+ #else
24
+ # include <dlfcn.h>
25
+ # include <unistd.h>
26
+ #endif
27
+
8
28
  // Backend registry
29
+ #ifdef GGML_USE_CPU
30
+ #include "ggml-cpu.h"
31
+ #endif
9
32
 
10
33
  #ifdef GGML_USE_CUDA
11
34
  #include "ggml-cuda.h"
@@ -23,6 +46,10 @@
23
46
  #include "ggml-vulkan.h"
24
47
  #endif
25
48
 
49
+ #ifdef GGML_USE_OPENCL
50
+ #include "ggml-opencl.h"
51
+ #endif
52
+
26
53
  #ifdef GGML_USE_BLAS
27
54
  #include "ggml-blas.h"
28
55
  #endif
@@ -31,10 +58,6 @@
31
58
  #include "ggml-rpc.h"
32
59
  #endif
33
60
 
34
- #ifdef GGML_USE_AMX
35
- # include "ggml-amx.h"
36
- #endif
37
-
38
61
  #ifdef GGML_USE_CANN
39
62
  #include "ggml-cann.h"
40
63
  #endif
@@ -43,8 +66,75 @@
43
66
  #include "ggml-kompute.h"
44
67
  #endif
45
68
 
69
+ #ifdef _WIN32
70
+
71
+ using dl_handle = std::remove_pointer_t<HMODULE>;
72
+
73
+ struct dl_handle_deleter {
74
+ void operator()(HMODULE handle) {
75
+ FreeLibrary(handle);
76
+ }
77
+ };
78
+
79
+ static dl_handle * dl_load_library(const std::wstring & path) {
80
+ // suppress error dialogs for missing DLLs
81
+ DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
82
+ SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
83
+
84
+ HMODULE handle = LoadLibraryW(path.c_str());
85
+
86
+ SetErrorMode(old_mode);
87
+
88
+ return handle;
89
+ }
90
+
91
+ static dl_handle * dl_load_library(const std::string & path) {
92
+ std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
93
+ return dl_load_library(converter.from_bytes(path));
94
+ }
95
+
96
+ static void * dl_get_sym(dl_handle * handle, const char * name) {
97
+ DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
98
+ SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
99
+
100
+ void * p = (void *) GetProcAddress(handle, name);
101
+
102
+ SetErrorMode(old_mode);
103
+
104
+ return p;
105
+ }
106
+
107
+ #else
108
+
109
+ using dl_handle = void;
110
+
111
+ struct dl_handle_deleter {
112
+ void operator()(void * handle) {
113
+ dlclose(handle);
114
+ }
115
+ };
116
+
117
+ static void * dl_load_library(const std::string & path) {
118
+ dl_handle * handle = dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
119
+
120
+ return handle;
121
+ }
122
+
123
+ static void * dl_get_sym(dl_handle * handle, const char * name) {
124
+ return dlsym(handle, name);
125
+ }
126
+
127
+ #endif
128
+
129
+ using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
130
+
131
+ struct ggml_backend_reg_entry {
132
+ ggml_backend_reg_t reg;
133
+ dl_handle_ptr handle;
134
+ };
135
+
46
136
  struct ggml_backend_registry {
47
- std::vector<ggml_backend_reg_t> backends;
137
+ std::vector<ggml_backend_reg_entry> backends;
48
138
  std::vector<ggml_backend_dev_t> devices;
49
139
 
50
140
  ggml_backend_registry() {
@@ -60,6 +150,9 @@ struct ggml_backend_registry {
60
150
  #ifdef GGML_USE_VULKAN
61
151
  register_backend(ggml_backend_vk_reg());
62
152
  #endif
153
+ #ifdef GGML_USE_OPENCL
154
+ register_backend(ggml_backend_opencl_reg());
155
+ #endif
63
156
  #ifdef GGML_USE_CANN
64
157
  register_backend(ggml_backend_cann_reg());
65
158
  #endif
@@ -69,17 +162,25 @@ struct ggml_backend_registry {
69
162
  #ifdef GGML_USE_RPC
70
163
  register_backend(ggml_backend_rpc_reg());
71
164
  #endif
72
- #ifdef GGML_USE_AMX
73
- register_backend(ggml_backend_amx_reg());
74
- #endif
75
165
  #ifdef GGML_USE_KOMPUTE
76
166
  register_backend(ggml_backend_kompute_reg());
77
167
  #endif
78
-
168
+ #ifdef GGML_USE_CPU
79
169
  register_backend(ggml_backend_cpu_reg());
170
+ #endif
171
+ }
172
+
173
+ ~ggml_backend_registry() {
174
+ // FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
175
+ // since backend threads may still be running and accessing resources from the dynamic library
176
+ for (auto & entry : backends) {
177
+ if (entry.handle) {
178
+ entry.handle.release(); // NOLINT
179
+ }
180
+ }
80
181
  }
81
182
 
82
- void register_backend(ggml_backend_reg_t reg) {
183
+ void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
83
184
  if (!reg) {
84
185
  return;
85
186
  }
@@ -88,7 +189,7 @@ struct ggml_backend_registry {
88
189
  GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
89
190
  __func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
90
191
  #endif
91
- backends.push_back(reg);
192
+ backends.push_back({ reg, std::move(handle) });
92
193
  for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
93
194
  register_device(ggml_backend_reg_dev_get(reg, i));
94
195
  }
@@ -100,6 +201,76 @@ struct ggml_backend_registry {
100
201
  #endif
101
202
  devices.push_back(device);
102
203
  }
204
+
205
+ ggml_backend_reg_t load_backend(const char * path, bool silent) {
206
+ dl_handle_ptr handle { dl_load_library(path) };
207
+ if (!handle) {
208
+ if (!silent) {
209
+ GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path);
210
+ }
211
+ return nullptr;
212
+ }
213
+
214
+ auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
215
+ if (score_fn && score_fn() == 0) {
216
+ if (!silent) {
217
+ GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path);
218
+ }
219
+ return nullptr;
220
+ }
221
+
222
+ auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
223
+ if (!backend_init_fn) {
224
+ if (!silent) {
225
+ GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path);
226
+ }
227
+ return nullptr;
228
+ }
229
+
230
+ ggml_backend_reg_t reg = backend_init_fn();
231
+ if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
232
+ if (!silent) {
233
+ if (!reg) {
234
+ GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path);
235
+ } else {
236
+ GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
237
+ __func__, path, reg->api_version, GGML_BACKEND_API_VERSION);
238
+ }
239
+ }
240
+ return nullptr;
241
+ }
242
+
243
+ GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path);
244
+
245
+ register_backend(reg, std::move(handle));
246
+
247
+ return reg;
248
+ }
249
+
250
+ void unload_backend(ggml_backend_reg_t reg, bool silent) {
251
+ auto it = std::find_if(backends.begin(), backends.end(),
252
+ [reg](const ggml_backend_reg_entry & entry) { return entry.reg == reg; });
253
+
254
+ if (it == backends.end()) {
255
+ if (!silent) {
256
+ GGML_LOG_ERROR("%s: backend not found\n", __func__);
257
+ }
258
+ return;
259
+ }
260
+
261
+ if (!silent) {
262
+ GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, ggml_backend_reg_name(reg));
263
+ }
264
+
265
+ // remove devices
266
+ devices.erase(
267
+ std::remove_if(devices.begin(), devices.end(),
268
+ [reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }),
269
+ devices.end());
270
+
271
+ // remove backend
272
+ backends.erase(it);
273
+ }
103
274
  };
104
275
 
105
276
  static ggml_backend_registry & get_reg() {
@@ -117,23 +288,32 @@ void ggml_backend_device_register(ggml_backend_dev_t device) {
117
288
  }
118
289
 
119
290
  // Backend (reg) enumeration
291
+ static bool striequals(const char * a, const char * b) {
292
+ for (; *a && *b; a++, b++) {
293
+ if (std::tolower(*a) != std::tolower(*b)) {
294
+ return false;
295
+ }
296
+ }
297
+ return *a == *b;
298
+ }
299
+
120
300
  size_t ggml_backend_reg_count() {
121
301
  return get_reg().backends.size();
122
302
  }
123
303
 
124
304
  ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
125
305
  GGML_ASSERT(index < ggml_backend_reg_count());
126
- return get_reg().backends[index];
306
+ return get_reg().backends[index].reg;
127
307
  }
128
308
 
129
309
  ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
130
310
  for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
131
311
  ggml_backend_reg_t reg = ggml_backend_reg_get(i);
132
- if (std::strcmp(ggml_backend_reg_name(reg), name) == 0) {
312
+ if (striequals(ggml_backend_reg_name(reg), name)) {
133
313
  return reg;
134
314
  }
135
315
  }
136
- return NULL;
316
+ return nullptr;
137
317
  }
138
318
 
139
319
  // Device enumeration
@@ -149,11 +329,11 @@ ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
149
329
  ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
150
330
  for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
151
331
  ggml_backend_dev_t dev = ggml_backend_dev_get(i);
152
- if (strcmp(ggml_backend_dev_name(dev), name) == 0) {
332
+ if (striequals(ggml_backend_dev_name(dev), name)) {
153
333
  return dev;
154
334
  }
155
335
  }
156
- return NULL;
336
+ return nullptr;
157
337
  }
158
338
 
159
339
  ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
@@ -163,14 +343,14 @@ ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
163
343
  return dev;
164
344
  }
165
345
  }
166
- return NULL;
346
+ return nullptr;
167
347
  }
168
348
 
169
349
  // Convenience functions
170
350
  ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
171
351
  ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
172
352
  if (!dev) {
173
- return NULL;
353
+ return nullptr;
174
354
  }
175
355
  return ggml_backend_dev_init(dev, params);
176
356
  }
@@ -178,7 +358,7 @@ ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params)
178
358
  ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
179
359
  ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
180
360
  if (!dev) {
181
- return NULL;
361
+ return nullptr;
182
362
  }
183
363
  return ggml_backend_dev_init(dev, params);
184
364
  }
@@ -189,7 +369,184 @@ ggml_backend_t ggml_backend_init_best(void) {
189
369
  dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
190
370
  }
191
371
  if (!dev) {
192
- return NULL;
372
+ return nullptr;
373
+ }
374
+ return ggml_backend_dev_init(dev, nullptr);
375
+ }
376
+
377
+ // Dynamic loading
378
+ ggml_backend_reg_t ggml_backend_load(const char * path) {
379
+ return get_reg().load_backend(path, false);
380
+ }
381
+
382
+ void ggml_backend_unload(ggml_backend_reg_t reg) {
383
+ get_reg().unload_backend(reg, true);
384
+ }
385
+
386
+ static std::string get_executable_path() {
387
+ #if defined(__APPLE__)
388
+ // get executable path
389
+ std::vector<char> path;
390
+ uint32_t size;
391
+ while (true) {
392
+ size = path.size();
393
+ if (_NSGetExecutablePath(path.data(), &size) == 0) {
394
+ break;
395
+ }
396
+ path.resize(size);
397
+ }
398
+ std::string base_path(path.data(), size);
399
+ // remove executable name
400
+ auto last_slash = base_path.find_last_of('/');
401
+ if (last_slash != std::string::npos) {
402
+ base_path = base_path.substr(0, last_slash);
403
+ }
404
+ return base_path + "/";
405
+ #elif defined(__linux__)
406
+ std::string base_path = ".";
407
+ std::vector<char> path(1024);
408
+ while (true) {
409
+ // get executable path
410
+ ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
411
+ if (len == -1) {
412
+ break;
413
+ }
414
+ if (len < (ssize_t) path.size()) {
415
+ base_path = std::string(path.data(), len);
416
+ // remove executable name
417
+ auto last_slash = base_path.find_last_of('/');
418
+ if (last_slash != std::string::npos) {
419
+ base_path = base_path.substr(0, last_slash);
420
+ }
421
+ break;
422
+ }
423
+ path.resize(path.size() * 2);
424
+ }
425
+
426
+ return base_path + "/";
427
+ #elif defined(_WIN32)
428
+ std::vector<char> path(MAX_PATH);
429
+ DWORD len = GetModuleFileNameA(NULL, path.data(), path.size());
430
+ if (len == 0) {
431
+ return "";
193
432
  }
194
- return ggml_backend_dev_init(dev, NULL);
433
+ std::string base_path(path.data(), len);
434
+ // remove executable name
435
+ auto last_slash = base_path.find_last_of('\\');
436
+ if (last_slash != std::string::npos) {
437
+ base_path = base_path.substr(0, last_slash);
438
+ }
439
+ return base_path + "\\";
440
+ #endif
441
+ }
442
+
443
+ static std::string backend_filename_prefix() {
444
+ #ifdef _WIN32
445
+ return "ggml-";
446
+ #else
447
+ return "libggml-";
448
+ #endif
449
+ }
450
+
451
+ static std::string backend_filename_suffix() {
452
+ #ifdef _WIN32
453
+ return ".dll";
454
+ #else
455
+ return ".so";
456
+ #endif
457
+ }
458
+
459
+ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
460
+ // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
461
+ // TODO: search system paths
462
+ std::string file_prefix = backend_filename_prefix() + name + "-";
463
+ std::vector<std::string> search_paths;
464
+ if (user_search_path == nullptr) {
465
+ search_paths.push_back("./");
466
+ search_paths.push_back(get_executable_path());
467
+ } else {
468
+ #if defined(_WIN32)
469
+ search_paths.push_back(std::string(user_search_path) + "\\");
470
+ #else
471
+ search_paths.push_back(std::string(user_search_path) + "/");
472
+ #endif
473
+ }
474
+
475
+ int best_score = 0;
476
+ std::string best_path;
477
+
478
+ namespace fs = std::filesystem;
479
+ for (const auto & search_path : search_paths) {
480
+ if (!fs::exists(search_path)) {
481
+ continue;
482
+ }
483
+ fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
484
+ for (const auto & entry : dir_it) {
485
+ if (entry.is_regular_file()) {
486
+ std::string filename = entry.path().filename().string();
487
+ std::string ext = entry.path().extension().string();
488
+ if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
489
+ dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
490
+ if (!handle && !silent) {
491
+ GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
492
+ }
493
+ if (handle) {
494
+ auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
495
+ if (score_fn) {
496
+ int s = score_fn();
497
+ #ifndef NDEBUG
498
+ GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
499
+ #endif
500
+ if (s > best_score) {
501
+ best_score = s;
502
+ best_path = entry.path().string();
503
+ }
504
+ } else {
505
+ if (!silent) {
506
+ GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
507
+ }
508
+ }
509
+ }
510
+ }
511
+ }
512
+ }
513
+ }
514
+
515
+ if (best_score == 0) {
516
+ // try to load the base backend
517
+ for (const auto & search_path : search_paths) {
518
+ std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
519
+ if (fs::exists(path)) {
520
+ return get_reg().load_backend(path.c_str(), silent);
521
+ }
522
+ }
523
+ return nullptr;
524
+ }
525
+
526
+ return get_reg().load_backend(best_path.c_str(), silent);
527
+ }
528
+
529
+ void ggml_backend_load_all() {
530
+ ggml_backend_load_all_from_path(nullptr);
531
+ }
532
+
533
+ void ggml_backend_load_all_from_path(const char * dir_path) {
534
+ #ifdef NDEBUG
535
+ bool silent = true;
536
+ #else
537
+ bool silent = false;
538
+ #endif
539
+
540
+ ggml_backend_load_best("blas", silent, dir_path);
541
+ ggml_backend_load_best("cann", silent, dir_path);
542
+ ggml_backend_load_best("cuda", silent, dir_path);
543
+ ggml_backend_load_best("hip", silent, dir_path);
544
+ ggml_backend_load_best("kompute", silent, dir_path);
545
+ ggml_backend_load_best("metal", silent, dir_path);
546
+ ggml_backend_load_best("rpc", silent, dir_path);
547
+ ggml_backend_load_best("sycl", silent, dir_path);
548
+ ggml_backend_load_best("vulkan", silent, dir_path);
549
+ ggml_backend_load_best("opencl", silent, dir_path);
550
+ ggml_backend_load_best("musa", silent, dir_path);
551
+ ggml_backend_load_best("cpu", silent, dir_path);
195
552
  }
@@ -252,6 +252,7 @@ void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_ten
252
252
  }
253
253
 
254
254
  void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
255
+ GGML_ASSERT(tensor);
255
256
  ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
256
257
 
257
258
  if (size == 0) {
@@ -266,6 +267,7 @@ void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, siz
266
267
  }
267
268
 
268
269
  void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
270
+ GGML_ASSERT(tensor);
269
271
  ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
270
272
 
271
273
  if (size == 0) {
@@ -740,7 +742,8 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
740
742
 
741
743
  if (tensor->buffer || (tensor->view_src && tensor->view_src->buffer)) {
742
744
  // since the tensor is pre-allocated, it cannot be moved to another backend
743
- GGML_ABORT("pre-allocated tensor (%s) in a backend that cannot run the operation", tensor->name);
745
+ ggml_backend_buffer_t buffer = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
746
+ GGML_ABORT("pre-allocated tensor (%s) in a buffer (%s) that cannot run the operation (%s)", tensor->name, ggml_backend_buffer_name(buffer), ggml_op_name(tensor->op));
744
747
  }
745
748
 
746
749
  // graph input
@@ -884,9 +887,6 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
884
887
  for (int i = 0; i < graph->n_nodes; i++) {
885
888
  struct ggml_tensor * node = graph->nodes[i];
886
889
  int * node_backend_id = &tensor_backend_id(node);
887
- if (ggml_is_view_op(node->op)) {
888
- continue;
889
- }
890
890
  // do not overwrite user assignments
891
891
  if (*node_backend_id == -1) {
892
892
  *node_backend_id = ggml_backend_sched_backend_id_from_cur(sched, node);
@@ -11,12 +11,9 @@ find_package(BLAS)
11
11
  if (BLAS_FOUND)
12
12
  message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
13
13
 
14
- add_library(ggml-blas
15
- ggml-blas.cpp
16
- )
17
-
18
- target_link_libraries(ggml-blas PRIVATE ggml-base)
19
- target_include_directories(ggml-blas PRIVATE . ..)
14
+ ggml_add_backend_library(ggml-blas
15
+ ggml-blas.cpp
16
+ )
20
17
 
21
18
  if (${GGML_BLAS_VENDOR} MATCHES "Apple")
22
19
  add_compile_definitions(ACCELERATE_NEW_LAPACK)
@@ -75,7 +72,6 @@ if (BLAS_FOUND)
75
72
 
76
73
  message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
77
74
 
78
- #add_compile_options(${BLAS_LINKER_FLAGS})
79
75
  target_compile_options(ggml-blas PRIVATE ${BLAS_LINKER_FLAGS})
80
76
 
81
77
  if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${GGML_BLAS_VENDOR} MATCHES "Generic" OR ${GGML_BLAS_VENDOR} MATCHES "Intel"))
@@ -506,9 +506,12 @@ static const struct ggml_backend_reg_i ggml_backend_blas_reg_i = {
506
506
 
507
507
  ggml_backend_reg_t ggml_backend_blas_reg(void) {
508
508
  static struct ggml_backend_reg ggml_backend_blas_reg = {
509
- /* .iface = */ ggml_backend_blas_reg_i,
510
- /* .context = */ NULL,
509
+ /* .api_version = */ GGML_BACKEND_API_VERSION,
510
+ /* .iface = */ ggml_backend_blas_reg_i,
511
+ /* .context = */ NULL,
511
512
  };
512
513
 
513
514
  return &ggml_backend_blas_reg;
514
515
  }
516
+
517
+ GGML_BACKEND_DL_IMPL(ggml_backend_blas_reg)