cui-llama.rn 1.4.4 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. package/android/src/main/CMakeLists.txt +9 -2
  2. package/android/src/main/jni.cpp +54 -34
  3. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  11. package/cpp/binary-ops.cpp +158 -0
  12. package/cpp/binary-ops.h +16 -0
  13. package/cpp/chat.cpp +1769 -1085
  14. package/cpp/chat.h +143 -0
  15. package/cpp/common.cpp +1562 -1996
  16. package/cpp/common.h +677 -744
  17. package/cpp/cpu-common.h +72 -0
  18. package/cpp/ggml-alloc.c +1039 -1030
  19. package/cpp/ggml-alloc.h +1 -1
  20. package/cpp/ggml-backend-impl.h +255 -255
  21. package/cpp/ggml-backend-reg.cpp +586 -582
  22. package/cpp/ggml-backend.cpp +2004 -2002
  23. package/cpp/ggml-backend.h +354 -354
  24. package/cpp/ggml-common.h +1857 -1851
  25. package/cpp/ggml-cpp.h +39 -39
  26. package/cpp/ggml-cpu-aarch64.cpp +5725 -4247
  27. package/cpp/ggml-cpu-aarch64.h +8 -8
  28. package/cpp/ggml-cpu-impl.h +512 -380
  29. package/cpp/ggml-cpu-quants.c +13026 -11517
  30. package/cpp/ggml-cpu-traits.cpp +36 -36
  31. package/cpp/ggml-cpu-traits.h +38 -38
  32. package/cpp/ggml-cpu.c +3438 -14485
  33. package/cpp/ggml-cpu.cpp +655 -633
  34. package/cpp/ggml-cpu.h +138 -135
  35. package/cpp/ggml-impl.h +594 -567
  36. package/cpp/ggml-metal-impl.h +312 -3
  37. package/cpp/ggml-metal.h +66 -66
  38. package/cpp/ggml-metal.m +5360 -5002
  39. package/cpp/ggml-opt.cpp +854 -854
  40. package/cpp/ggml-opt.h +216 -216
  41. package/cpp/ggml-quants.c +5238 -5238
  42. package/cpp/ggml-threading.h +14 -14
  43. package/cpp/ggml.c +6618 -6524
  44. package/cpp/ggml.h +2222 -2194
  45. package/cpp/gguf.cpp +1330 -1329
  46. package/cpp/gguf.h +202 -202
  47. package/cpp/json-schema-to-grammar.cpp +1024 -1025
  48. package/cpp/json-schema-to-grammar.h +21 -22
  49. package/cpp/json.hpp +24766 -24766
  50. package/cpp/llama-adapter.cpp +382 -347
  51. package/cpp/llama-adapter.h +76 -74
  52. package/cpp/llama-arch.cpp +1714 -1492
  53. package/cpp/llama-arch.h +428 -402
  54. package/cpp/llama-batch.cpp +368 -368
  55. package/cpp/llama-batch.h +88 -88
  56. package/cpp/llama-chat.cpp +640 -587
  57. package/cpp/llama-chat.h +56 -53
  58. package/cpp/llama-context.cpp +2831 -1775
  59. package/cpp/llama-context.h +265 -128
  60. package/cpp/llama-cparams.cpp +1 -1
  61. package/cpp/llama-cparams.h +38 -37
  62. package/cpp/llama-cpp.h +30 -30
  63. package/cpp/llama-grammar.cpp +1219 -1219
  64. package/cpp/llama-grammar.h +173 -164
  65. package/cpp/llama-graph.cpp +1695 -0
  66. package/cpp/llama-graph.h +592 -0
  67. package/cpp/llama-hparams.cpp +79 -71
  68. package/cpp/llama-hparams.h +156 -139
  69. package/cpp/llama-impl.cpp +167 -167
  70. package/cpp/llama-impl.h +61 -61
  71. package/cpp/llama-io.cpp +15 -0
  72. package/cpp/llama-io.h +35 -0
  73. package/cpp/llama-kv-cache.cpp +1380 -718
  74. package/cpp/llama-kv-cache.h +213 -218
  75. package/cpp/llama-memory.cpp +1 -0
  76. package/cpp/llama-memory.h +21 -0
  77. package/cpp/llama-mmap.cpp +600 -590
  78. package/cpp/llama-mmap.h +68 -68
  79. package/cpp/llama-model-loader.cpp +1129 -1124
  80. package/cpp/llama-model-loader.h +169 -167
  81. package/cpp/llama-model.cpp +13080 -4023
  82. package/cpp/llama-model.h +409 -370
  83. package/cpp/llama-sampling.cpp +2563 -2525
  84. package/cpp/llama-sampling.h +32 -32
  85. package/cpp/llama-vocab.cpp +3295 -3252
  86. package/cpp/llama-vocab.h +125 -125
  87. package/cpp/llama.cpp +351 -10137
  88. package/cpp/llama.h +1434 -1340
  89. package/cpp/log.cpp +427 -423
  90. package/cpp/log.h +132 -132
  91. package/cpp/{chat-template.hpp → minja/chat-template.hpp} +537 -529
  92. package/cpp/{minja.hpp → minja/minja.hpp} +2941 -2883
  93. package/cpp/ops.cpp +8723 -0
  94. package/cpp/ops.h +128 -0
  95. package/cpp/rn-llama.cpp +45 -71
  96. package/cpp/rn-llama.h +3 -3
  97. package/cpp/sampling.cpp +573 -532
  98. package/cpp/sgemm.cpp +3043 -2598
  99. package/cpp/sgemm.h +14 -14
  100. package/cpp/simd-mappings.h +888 -0
  101. package/cpp/speculative.cpp +278 -277
  102. package/cpp/speculative.h +28 -28
  103. package/cpp/unary-ops.cpp +186 -0
  104. package/cpp/unary-ops.h +28 -0
  105. package/cpp/vec.cpp +258 -0
  106. package/cpp/vec.h +802 -0
  107. package/ios/CMakeLists.txt +5 -2
  108. package/ios/RNLlama.mm +2 -2
  109. package/ios/RNLlamaContext.mm +40 -24
  110. package/package.json +1 -1
  111. package/src/NativeRNLlama.ts +6 -4
  112. package/src/index.ts +3 -1
  113. package/android/src/main/build-arm64/CMakeCache.txt +0 -429
  114. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
  115. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCXXCompiler.cmake +0 -101
  116. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_C.bin +0 -0
  117. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_CXX.bin +0 -0
  118. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
  119. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
  120. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
  121. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
  122. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
  123. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -431
  124. package/android/src/main/build-arm64/CMakeFiles/CMakeDirectoryInformation.cmake +0 -16
  125. package/android/src/main/build-arm64/CMakeFiles/Makefile.cmake +0 -165
  126. package/android/src/main/build-arm64/CMakeFiles/Makefile2 +0 -297
  127. package/android/src/main/build-arm64/CMakeFiles/Progress/1 +0 -1
  128. package/android/src/main/build-arm64/CMakeFiles/Progress/2 +0 -1
  129. package/android/src/main/build-arm64/CMakeFiles/Progress/3 +0 -1
  130. package/android/src/main/build-arm64/CMakeFiles/Progress/4 +0 -1
  131. package/android/src/main/build-arm64/CMakeFiles/Progress/5 +0 -1
  132. package/android/src/main/build-arm64/CMakeFiles/Progress/6 +0 -1
  133. package/android/src/main/build-arm64/CMakeFiles/Progress/count.txt +0 -1
  134. package/android/src/main/build-arm64/CMakeFiles/TargetDirectories.txt +0 -8
  135. package/android/src/main/build-arm64/CMakeFiles/cmake.check_cache +0 -1
  136. package/android/src/main/build-arm64/CMakeFiles/progress.marks +0 -1
  137. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o +0 -0
  138. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o.d +0 -58
  139. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o +0 -0
  140. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o.d +0 -756
  141. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o +0 -0
  142. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o.d +0 -709
  143. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o +0 -0
  144. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o.d +0 -714
  145. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o +0 -0
  146. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o.d +0 -62
  147. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o +0 -0
  148. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o.d +0 -708
  149. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o +0 -0
  150. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o.d +0 -113
  151. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o +0 -0
  152. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o.d +0 -713
  153. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o +0 -0
  154. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o.d +0 -763
  155. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o +0 -0
  156. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o.d +0 -61
  157. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o +0 -0
  158. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o.d +0 -707
  159. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o +0 -0
  160. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o.d +0 -104
  161. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o +0 -0
  162. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o.d +0 -714
  163. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o +0 -0
  164. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o.d +0 -723
  165. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/DependInfo.cmake +0 -62
  166. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/build.make +0 -722
  167. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/cmake_clean.cmake +0 -89
  168. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.make +0 -2
  169. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.ts +0 -2
  170. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/depend.make +0 -2
  171. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/flags.make +0 -17
  172. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/progress.make +0 -41
  173. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/DependInfo.cmake +0 -62
  174. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/build.make +0 -722
  175. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/cmake_clean.cmake +0 -89
  176. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.make +0 -2
  177. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.ts +0 -2
  178. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/depend.make +0 -2
  179. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/flags.make +0 -17
  180. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/progress.make +0 -41
  181. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/DependInfo.cmake +0 -62
  182. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/build.make +0 -722
  183. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/cmake_clean.cmake +0 -89
  184. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.make +0 -2
  185. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.ts +0 -2
  186. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/depend.make +0 -2
  187. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/flags.make +0 -17
  188. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/progress.make +0 -41
  189. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/DependInfo.cmake +0 -62
  190. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/build.make +0 -722
  191. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/cmake_clean.cmake +0 -89
  192. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.make +0 -2
  193. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.ts +0 -2
  194. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/depend.make +0 -2
  195. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/flags.make +0 -17
  196. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/progress.make +0 -41
  197. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/DependInfo.cmake +0 -62
  198. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/build.make +0 -722
  199. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/cmake_clean.cmake +0 -89
  200. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.make +0 -2
  201. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.ts +0 -2
  202. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/depend.make +0 -2
  203. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/flags.make +0 -17
  204. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/progress.make +0 -41
  205. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/DependInfo.cmake +0 -62
  206. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/build.make +0 -722
  207. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/cmake_clean.cmake +0 -89
  208. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.make +0 -2
  209. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.ts +0 -2
  210. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/depend.make +0 -2
  211. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/flags.make +0 -17
  212. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/progress.make +0 -41
  213. package/android/src/main/build-arm64/Makefile +0 -1862
  214. package/android/src/main/build-arm64/cmake_install.cmake +0 -66
  215. package/cpp/chat.hpp +0 -55
  216. package/cpp/rn-llama.hpp +0 -913
@@ -1,582 +1,586 @@
1
- #include "ggml-backend-impl.h"
2
- #include "ggml-backend.h"
3
- #include "ggml-impl.h"
4
- #include <algorithm>
5
- #include <codecvt>
6
- #include <cstring>
7
- #include <filesystem>
8
- #include <locale>
9
- #include <memory>
10
- #include <string>
11
- #include <type_traits>
12
- #include <vector>
13
-
14
- #ifdef _WIN32
15
- # define WIN32_LEAN_AND_MEAN
16
- # ifndef NOMINMAX
17
- # define NOMINMAX
18
- # endif
19
- # include <windows.h>
20
- #elif defined(__APPLE__)
21
- # include <mach-o/dyld.h>
22
- # include <dlfcn.h>
23
- #else
24
- # include <dlfcn.h>
25
- # include <unistd.h>
26
- #endif
27
-
28
- // Backend registry
29
- #ifdef LM_GGML_USE_CPU
30
- #include "ggml-cpu.h"
31
- #endif
32
-
33
- #ifdef LM_GGML_USE_CUDA
34
- #include "ggml-cuda.h"
35
- #endif
36
-
37
- #ifdef LM_GGML_USE_METAL
38
- #include "ggml-metal.h"
39
- #endif
40
-
41
- #ifdef LM_GGML_USE_SYCL
42
- #include "ggml-sycl.h"
43
- #endif
44
-
45
- #ifdef LM_GGML_USE_VULKAN
46
- #include "ggml-vulkan.h"
47
- #endif
48
-
49
- #ifdef LM_GGML_USE_OPENCL
50
- #include "ggml-opencl.h"
51
- #endif
52
-
53
- #ifdef LM_GGML_USE_BLAS
54
- #include "ggml-blas.h"
55
- #endif
56
-
57
- #ifdef LM_GGML_USE_RPC
58
- #include "ggml-rpc.h"
59
- #endif
60
-
61
- #ifdef LM_GGML_USE_CANN
62
- #include "ggml-cann.h"
63
- #endif
64
-
65
- #ifdef LM_GGML_USE_KOMPUTE
66
- #include "ggml-kompute.h"
67
- #endif
68
-
69
- // disable C++17 deprecation warning for std::codecvt_utf8
70
- #if defined(__clang__)
71
- # pragma clang diagnostic push
72
- # pragma clang diagnostic ignored "-Wdeprecated-declarations"
73
- #endif
74
-
75
- static std::wstring utf8_to_utf16(const std::string & str) {
76
- std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
77
- return converter.from_bytes(str);
78
- }
79
-
80
- static std::string utf16_to_utf8(const std::wstring & str) {
81
- std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
82
- return converter.to_bytes(str);
83
- }
84
-
85
- #if defined(__clang__)
86
- # pragma clang diagnostic pop
87
- #endif
88
-
89
- #ifdef _WIN32
90
-
91
- using dl_handle = std::remove_pointer_t<HMODULE>;
92
-
93
- struct dl_handle_deleter {
94
- void operator()(HMODULE handle) {
95
- FreeLibrary(handle);
96
- }
97
- };
98
-
99
- static dl_handle * dl_load_library(const std::wstring & path) {
100
- // suppress error dialogs for missing DLLs
101
- DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
102
- SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
103
-
104
- HMODULE handle = LoadLibraryW(path.c_str());
105
-
106
- SetErrorMode(old_mode);
107
-
108
- return handle;
109
- }
110
-
111
- static void * dl_get_sym(dl_handle * handle, const char * name) {
112
- DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
113
- SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
114
-
115
- void * p = (void *) GetProcAddress(handle, name);
116
-
117
- SetErrorMode(old_mode);
118
-
119
- return p;
120
- }
121
-
122
- #else
123
-
124
- using dl_handle = void;
125
-
126
- struct dl_handle_deleter {
127
- void operator()(void * handle) {
128
- dlclose(handle);
129
- }
130
- };
131
-
132
- static void * dl_load_library(const std::wstring & path) {
133
- dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL);
134
-
135
- return handle;
136
- }
137
-
138
- static void * dl_get_sym(dl_handle * handle, const char * name) {
139
- return dlsym(handle, name);
140
- }
141
-
142
- #endif
143
-
144
- using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
145
-
146
- struct lm_ggml_backend_reg_entry {
147
- lm_ggml_backend_reg_t reg;
148
- dl_handle_ptr handle;
149
- };
150
-
151
- struct lm_ggml_backend_registry {
152
- std::vector<lm_ggml_backend_reg_entry> backends;
153
- std::vector<lm_ggml_backend_dev_t> devices;
154
-
155
- lm_ggml_backend_registry() {
156
- #ifdef LM_GGML_USE_CUDA
157
- register_backend(lm_ggml_backend_cuda_reg());
158
- #endif
159
- #ifdef LM_GGML_USE_METAL
160
- register_backend(lm_ggml_backend_metal_reg());
161
- #endif
162
- #ifdef LM_GGML_USE_SYCL
163
- register_backend(lm_ggml_backend_sycl_reg());
164
- #endif
165
- #ifdef LM_GGML_USE_VULKAN
166
- register_backend(lm_ggml_backend_vk_reg());
167
- #endif
168
- #ifdef LM_GGML_USE_OPENCL
169
- register_backend(lm_ggml_backend_opencl_reg());
170
- #endif
171
- #ifdef LM_GGML_USE_CANN
172
- register_backend(lm_ggml_backend_cann_reg());
173
- #endif
174
- #ifdef LM_GGML_USE_BLAS
175
- register_backend(lm_ggml_backend_blas_reg());
176
- #endif
177
- #ifdef LM_GGML_USE_RPC
178
- register_backend(lm_ggml_backend_rpc_reg());
179
- #endif
180
- #ifdef LM_GGML_USE_KOMPUTE
181
- register_backend(lm_ggml_backend_kompute_reg());
182
- #endif
183
- #ifdef LM_GGML_USE_CPU
184
- register_backend(lm_ggml_backend_cpu_reg());
185
- #endif
186
- }
187
-
188
- ~lm_ggml_backend_registry() {
189
- // FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
190
- // since backend threads may still be running and accessing resources from the dynamic library
191
- for (auto & entry : backends) {
192
- if (entry.handle) {
193
- entry.handle.release(); // NOLINT
194
- }
195
- }
196
- }
197
-
198
- void register_backend(lm_ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
199
- if (!reg) {
200
- return;
201
- }
202
-
203
- #ifndef NDEBUG
204
- LM_GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
205
- __func__, lm_ggml_backend_reg_name(reg), lm_ggml_backend_reg_dev_count(reg));
206
- #endif
207
- backends.push_back({ reg, std::move(handle) });
208
- for (size_t i = 0; i < lm_ggml_backend_reg_dev_count(reg); i++) {
209
- register_device(lm_ggml_backend_reg_dev_get(reg, i));
210
- }
211
- }
212
-
213
- void register_device(lm_ggml_backend_dev_t device) {
214
- #ifndef NDEBUG
215
- LM_GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, lm_ggml_backend_dev_name(device), lm_ggml_backend_dev_description(device));
216
- #endif
217
- devices.push_back(device);
218
- }
219
-
220
- lm_ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
221
- dl_handle_ptr handle { dl_load_library(path) };
222
- if (!handle) {
223
- if (!silent) {
224
- LM_GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str());
225
- }
226
- return nullptr;
227
- }
228
-
229
- auto score_fn = (lm_ggml_backend_score_t) dl_get_sym(handle.get(), "lm_ggml_backend_score");
230
- if (score_fn && score_fn() == 0) {
231
- if (!silent) {
232
- LM_GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str());
233
- }
234
- return nullptr;
235
- }
236
-
237
- auto backend_init_fn = (lm_ggml_backend_init_t) dl_get_sym(handle.get(), "lm_ggml_backend_init");
238
- if (!backend_init_fn) {
239
- if (!silent) {
240
- LM_GGML_LOG_ERROR("%s: failed to find lm_ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str());
241
- }
242
- return nullptr;
243
- }
244
-
245
- lm_ggml_backend_reg_t reg = backend_init_fn();
246
- if (!reg || reg->api_version != LM_GGML_BACKEND_API_VERSION) {
247
- if (!silent) {
248
- if (!reg) {
249
- LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: lm_ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str());
250
- } else {
251
- LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
252
- __func__, utf16_to_utf8(path).c_str(), reg->api_version, LM_GGML_BACKEND_API_VERSION);
253
- }
254
- }
255
- return nullptr;
256
- }
257
-
258
- LM_GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, lm_ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
259
-
260
- register_backend(reg, std::move(handle));
261
-
262
- return reg;
263
- }
264
-
265
- void unload_backend(lm_ggml_backend_reg_t reg, bool silent) {
266
- auto it = std::find_if(backends.begin(), backends.end(),
267
- [reg](const lm_ggml_backend_reg_entry & entry) { return entry.reg == reg; });
268
-
269
- if (it == backends.end()) {
270
- if (!silent) {
271
- LM_GGML_LOG_ERROR("%s: backend not found\n", __func__);
272
- }
273
- return;
274
- }
275
-
276
- if (!silent) {
277
- LM_GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, lm_ggml_backend_reg_name(reg));
278
- }
279
-
280
- // remove devices
281
- devices.erase(
282
- std::remove_if(devices.begin(), devices.end(),
283
- [reg](lm_ggml_backend_dev_t dev) { return lm_ggml_backend_dev_backend_reg(dev) == reg; }),
284
- devices.end());
285
-
286
- // remove backend
287
- backends.erase(it);
288
- }
289
- };
290
-
291
- static lm_ggml_backend_registry & get_reg() {
292
- static lm_ggml_backend_registry reg;
293
- return reg;
294
- }
295
-
296
- // Internal API
297
- void lm_ggml_backend_register(lm_ggml_backend_reg_t reg) {
298
- get_reg().register_backend(reg);
299
- }
300
-
301
- void lm_ggml_backend_device_register(lm_ggml_backend_dev_t device) {
302
- get_reg().register_device(device);
303
- }
304
-
305
- // Backend (reg) enumeration
306
- static bool striequals(const char * a, const char * b) {
307
- for (; *a && *b; a++, b++) {
308
- if (std::tolower(*a) != std::tolower(*b)) {
309
- return false;
310
- }
311
- }
312
- return *a == *b;
313
- }
314
-
315
- size_t lm_ggml_backend_reg_count() {
316
- return get_reg().backends.size();
317
- }
318
-
319
- lm_ggml_backend_reg_t lm_ggml_backend_reg_get(size_t index) {
320
- LM_GGML_ASSERT(index < lm_ggml_backend_reg_count());
321
- return get_reg().backends[index].reg;
322
- }
323
-
324
- lm_ggml_backend_reg_t lm_ggml_backend_reg_by_name(const char * name) {
325
- for (size_t i = 0; i < lm_ggml_backend_reg_count(); i++) {
326
- lm_ggml_backend_reg_t reg = lm_ggml_backend_reg_get(i);
327
- if (striequals(lm_ggml_backend_reg_name(reg), name)) {
328
- return reg;
329
- }
330
- }
331
- return nullptr;
332
- }
333
-
334
- // Device enumeration
335
- size_t lm_ggml_backend_dev_count() {
336
- return get_reg().devices.size();
337
- }
338
-
339
- lm_ggml_backend_dev_t lm_ggml_backend_dev_get(size_t index) {
340
- LM_GGML_ASSERT(index < lm_ggml_backend_dev_count());
341
- return get_reg().devices[index];
342
- }
343
-
344
- lm_ggml_backend_dev_t lm_ggml_backend_dev_by_name(const char * name) {
345
- for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
346
- lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
347
- if (striequals(lm_ggml_backend_dev_name(dev), name)) {
348
- return dev;
349
- }
350
- }
351
- return nullptr;
352
- }
353
-
354
- lm_ggml_backend_dev_t lm_ggml_backend_dev_by_type(enum lm_ggml_backend_dev_type type) {
355
- for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
356
- lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
357
- if (lm_ggml_backend_dev_type(dev) == type) {
358
- return dev;
359
- }
360
- }
361
- return nullptr;
362
- }
363
-
364
- // Convenience functions
365
- lm_ggml_backend_t lm_ggml_backend_init_by_name(const char * name, const char * params) {
366
- lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_name(name);
367
- if (!dev) {
368
- return nullptr;
369
- }
370
- return lm_ggml_backend_dev_init(dev, params);
371
- }
372
-
373
- lm_ggml_backend_t lm_ggml_backend_init_by_type(enum lm_ggml_backend_dev_type type, const char * params) {
374
- lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(type);
375
- if (!dev) {
376
- return nullptr;
377
- }
378
- return lm_ggml_backend_dev_init(dev, params);
379
- }
380
-
381
- lm_ggml_backend_t lm_ggml_backend_init_best(void) {
382
- lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_GPU);
383
- if (!dev) {
384
- dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_CPU);
385
- }
386
- if (!dev) {
387
- return nullptr;
388
- }
389
- return lm_ggml_backend_dev_init(dev, nullptr);
390
- }
391
-
392
- // Dynamic loading
393
- lm_ggml_backend_reg_t lm_ggml_backend_load(const char * path) {
394
- return get_reg().load_backend(utf8_to_utf16(path), false);
395
- }
396
-
397
- void lm_ggml_backend_unload(lm_ggml_backend_reg_t reg) {
398
- get_reg().unload_backend(reg, true);
399
- }
400
-
401
- static std::wstring get_executable_path() {
402
- #if defined(__APPLE__)
403
- // get executable path
404
- std::vector<char> path;
405
- uint32_t size;
406
- while (true) {
407
- size = path.size();
408
- if (_NSGetExecutablePath(path.data(), &size) == 0) {
409
- break;
410
- }
411
- path.resize(size);
412
- }
413
- std::string base_path(path.data(), size);
414
- // remove executable name
415
- auto last_slash = base_path.find_last_of('/');
416
- if (last_slash != std::string::npos) {
417
- base_path = base_path.substr(0, last_slash);
418
- }
419
- return utf8_to_utf16(base_path + "/");
420
- #elif defined(__linux__) || defined(__FreeBSD__)
421
- std::string base_path = ".";
422
- std::vector<char> path(1024);
423
- while (true) {
424
- // get executable path
425
- # if defined(__linux__)
426
- ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
427
- # elif defined(__FreeBSD__)
428
- ssize_t len = readlink("/proc/curproc/file", path.data(), path.size());
429
- # endif
430
- if (len == -1) {
431
- break;
432
- }
433
- if (len < (ssize_t) path.size()) {
434
- base_path = std::string(path.data(), len);
435
- // remove executable name
436
- auto last_slash = base_path.find_last_of('/');
437
- if (last_slash != std::string::npos) {
438
- base_path = base_path.substr(0, last_slash);
439
- }
440
- break;
441
- }
442
- path.resize(path.size() * 2);
443
- }
444
-
445
- return utf8_to_utf16(base_path + "/");
446
- #elif defined(_WIN32)
447
- std::vector<wchar_t> path(MAX_PATH);
448
- DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
449
- if (len == 0) {
450
- return {};
451
- }
452
- std::wstring base_path(path.data(), len);
453
- // remove executable name
454
- auto last_slash = base_path.find_last_of('\\');
455
- if (last_slash != std::string::npos) {
456
- base_path = base_path.substr(0, last_slash);
457
- }
458
- return base_path + L"\\";
459
- #else
460
- return {};
461
- #endif
462
- }
463
-
464
- static std::wstring backend_filename_prefix() {
465
- #ifdef _WIN32
466
- return L"ggml-";
467
- #else
468
- return L"libggml-";
469
- #endif
470
- }
471
-
472
- static std::wstring backend_filename_suffix() {
473
- #ifdef _WIN32
474
- return L".dll";
475
- #else
476
- return L".so";
477
- #endif
478
- }
479
-
480
- static std::wstring path_separator() {
481
- #ifdef _WIN32
482
- return L"\\";
483
- #else
484
- return L"/";
485
- #endif
486
- }
487
-
488
- static lm_ggml_backend_reg_t lm_ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
489
- // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
490
- // TODO: search system paths
491
- std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-";
492
- std::vector<std::wstring> search_paths;
493
- if (user_search_path == nullptr) {
494
- search_paths.push_back(L"." + path_separator());
495
- search_paths.push_back(get_executable_path());
496
- } else {
497
- search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator());
498
- }
499
-
500
- int best_score = 0;
501
- std::wstring best_path;
502
-
503
- namespace fs = std::filesystem;
504
- for (const auto & search_path : search_paths) {
505
- if (!fs::exists(search_path)) {
506
- continue;
507
- }
508
- fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
509
- for (const auto & entry : dir_it) {
510
- if (entry.is_regular_file()) {
511
- std::wstring filename = entry.path().filename().wstring();
512
- std::wstring ext = entry.path().extension().wstring();
513
- if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
514
- dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
515
- if (!handle && !silent) {
516
- LM_GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
517
- }
518
- if (handle) {
519
- auto score_fn = (lm_ggml_backend_score_t) dl_get_sym(handle.get(), "lm_ggml_backend_score");
520
- if (score_fn) {
521
- int s = score_fn();
522
- #ifndef NDEBUG
523
- LM_GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
524
- #endif
525
- if (s > best_score) {
526
- best_score = s;
527
- best_path = entry.path().wstring();
528
- }
529
- } else {
530
- if (!silent) {
531
- LM_GGML_LOG_INFO("%s: failed to find lm_ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
532
- }
533
- }
534
- }
535
- }
536
- }
537
- }
538
- }
539
-
540
- if (best_score == 0) {
541
- // try to load the base backend
542
- for (const auto & search_path : search_paths) {
543
- std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix();
544
- if (fs::exists(path)) {
545
- return get_reg().load_backend(path, silent);
546
- }
547
- }
548
- return nullptr;
549
- }
550
-
551
- return get_reg().load_backend(best_path, silent);
552
- }
553
-
554
- void lm_ggml_backend_load_all() {
555
- lm_ggml_backend_load_all_from_path(nullptr);
556
- }
557
-
558
- void lm_ggml_backend_load_all_from_path(const char * dir_path) {
559
- #ifdef NDEBUG
560
- bool silent = true;
561
- #else
562
- bool silent = false;
563
- #endif
564
-
565
- lm_ggml_backend_load_best("blas", silent, dir_path);
566
- lm_ggml_backend_load_best("cann", silent, dir_path);
567
- lm_ggml_backend_load_best("cuda", silent, dir_path);
568
- lm_ggml_backend_load_best("hip", silent, dir_path);
569
- lm_ggml_backend_load_best("kompute", silent, dir_path);
570
- lm_ggml_backend_load_best("metal", silent, dir_path);
571
- lm_ggml_backend_load_best("rpc", silent, dir_path);
572
- lm_ggml_backend_load_best("sycl", silent, dir_path);
573
- lm_ggml_backend_load_best("vulkan", silent, dir_path);
574
- lm_ggml_backend_load_best("opencl", silent, dir_path);
575
- lm_ggml_backend_load_best("musa", silent, dir_path);
576
- lm_ggml_backend_load_best("cpu", silent, dir_path);
577
- // check the environment variable LM_GGML_BACKEND_PATH to load an out-of-tree backend
578
- const char * backend_path = std::getenv("LM_GGML_BACKEND_PATH");
579
- if (backend_path) {
580
- lm_ggml_backend_load(backend_path);
581
- }
582
- }
1
+ #include "ggml-backend-impl.h"
2
+ #include "ggml-backend.h"
3
+ #include "ggml-impl.h"
4
+ #include <algorithm>
5
+ #include <cstring>
6
+ #include <filesystem>
7
+ #include <memory>
8
+ #include <string>
9
+ #include <type_traits>
10
+ #include <vector>
11
+ #include <cctype>
12
+
13
+ #ifdef _WIN32
14
+ # define WIN32_LEAN_AND_MEAN
15
+ # ifndef NOMINMAX
16
+ # define NOMINMAX
17
+ # endif
18
+ # include <windows.h>
19
+ #elif defined(__APPLE__)
20
+ # include <mach-o/dyld.h>
21
+ # include <dlfcn.h>
22
+ #else
23
+ # include <dlfcn.h>
24
+ # include <unistd.h>
25
+ #endif
26
+
27
+ // Backend registry
28
+ #ifdef LM_GGML_USE_CPU
29
+ #include "ggml-cpu.h"
30
+ #endif
31
+
32
+ #ifdef LM_GGML_USE_CUDA
33
+ #include "ggml-cuda.h"
34
+ #endif
35
+
36
+ #ifdef LM_GGML_USE_METAL
37
+ #include "ggml-metal.h"
38
+ #endif
39
+
40
+ #ifdef LM_GGML_USE_SYCL
41
+ #include "ggml-sycl.h"
42
+ #endif
43
+
44
+ #ifdef LM_GGML_USE_VULKAN
45
+ #include "ggml-vulkan.h"
46
+ #endif
47
+
48
+ #ifdef LM_GGML_USE_OPENCL
49
+ #include "ggml-opencl.h"
50
+ #endif
51
+
52
+ #ifdef LM_GGML_USE_BLAS
53
+ #include "ggml-blas.h"
54
+ #endif
55
+
56
+ #ifdef LM_GGML_USE_RPC
57
+ #include "ggml-rpc.h"
58
+ #endif
59
+
60
+ #ifdef LM_GGML_USE_CANN
61
+ #include "ggml-cann.h"
62
+ #endif
63
+
64
+ #ifdef LM_GGML_USE_KOMPUTE
65
+ #include "ggml-kompute.h"
66
+ #endif
67
+
68
+ // disable C++17 deprecation warning for std::codecvt_utf8
69
+ #if defined(__clang__)
70
+ # pragma clang diagnostic push
71
+ # pragma clang diagnostic ignored "-Wdeprecated-declarations"
72
+ #endif
73
+
74
+ namespace fs = std::filesystem;
75
+
76
+ static std::string path_str(const fs::path & path) {
77
+ std::string u8path;
78
+ try {
79
+ #if defined(__cpp_lib_char8_t)
80
+ // C++20 and later: u8string() returns std::u8string
81
+ std::u8string u8str = path.u8string();
82
+ u8path = std::string(reinterpret_cast<const char*>(u8str.c_str()));
83
+ #else
84
+ // C++17: u8string() returns std::string
85
+ u8path = path.u8string();
86
+ #endif
87
+ } catch (...) {
88
+ }
89
+ return u8path;
90
+ }
91
+
92
+ #if defined(__clang__)
93
+ # pragma clang diagnostic pop
94
+ #endif
95
+
96
+ #ifdef _WIN32
97
+
98
+ using dl_handle = std::remove_pointer_t<HMODULE>;
99
+
100
+ struct dl_handle_deleter {
101
+ void operator()(HMODULE handle) {
102
+ FreeLibrary(handle);
103
+ }
104
+ };
105
+
106
+ static dl_handle * dl_load_library(const fs::path & path) {
107
+ // suppress error dialogs for missing DLLs
108
+ DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
109
+ SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
110
+
111
+ HMODULE handle = LoadLibraryW(path.wstring().c_str());
112
+
113
+ SetErrorMode(old_mode);
114
+
115
+ return handle;
116
+ }
117
+
118
+ static void * dl_get_sym(dl_handle * handle, const char * name) {
119
+ DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
120
+ SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
121
+
122
+ void * p = (void *) GetProcAddress(handle, name);
123
+
124
+ SetErrorMode(old_mode);
125
+
126
+ return p;
127
+ }
128
+
129
+ #else
130
+
131
+ using dl_handle = void;
132
+
133
+ struct dl_handle_deleter {
134
+ void operator()(void * handle) {
135
+ dlclose(handle);
136
+ }
137
+ };
138
+
139
+ static void * dl_load_library(const fs::path & path) {
140
+ dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
141
+
142
+ return handle;
143
+ }
144
+
145
+ static void * dl_get_sym(dl_handle * handle, const char * name) {
146
+ return dlsym(handle, name);
147
+ }
148
+
149
+ #endif
150
+
151
+ using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
152
+
153
+ struct lm_ggml_backend_reg_entry {
154
+ lm_ggml_backend_reg_t reg;
155
+ dl_handle_ptr handle;
156
+ };
157
+
158
+ struct lm_ggml_backend_registry {
159
+ std::vector<lm_ggml_backend_reg_entry> backends;
160
+ std::vector<lm_ggml_backend_dev_t> devices;
161
+
162
+ lm_ggml_backend_registry() {
163
+ #ifdef LM_GGML_USE_CUDA
164
+ register_backend(lm_ggml_backend_cuda_reg());
165
+ #endif
166
+ #ifdef LM_GGML_USE_METAL
167
+ register_backend(lm_ggml_backend_metal_reg());
168
+ #endif
169
+ #ifdef LM_GGML_USE_SYCL
170
+ register_backend(lm_ggml_backend_sycl_reg());
171
+ #endif
172
+ #ifdef LM_GGML_USE_VULKAN
173
+ register_backend(lm_ggml_backend_vk_reg());
174
+ #endif
175
+ #ifdef LM_GGML_USE_OPENCL
176
+ register_backend(lm_ggml_backend_opencl_reg());
177
+ #endif
178
+ #ifdef LM_GGML_USE_CANN
179
+ register_backend(lm_ggml_backend_cann_reg());
180
+ #endif
181
+ #ifdef LM_GGML_USE_BLAS
182
+ register_backend(lm_ggml_backend_blas_reg());
183
+ #endif
184
+ #ifdef LM_GGML_USE_RPC
185
+ register_backend(lm_ggml_backend_rpc_reg());
186
+ #endif
187
+ #ifdef LM_GGML_USE_KOMPUTE
188
+ register_backend(lm_ggml_backend_kompute_reg());
189
+ #endif
190
+ #ifdef LM_GGML_USE_CPU
191
+ register_backend(lm_ggml_backend_cpu_reg());
192
+ #endif
193
+ }
194
+
195
+ ~lm_ggml_backend_registry() {
196
+ // FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
197
+ // since backend threads may still be running and accessing resources from the dynamic library
198
+ for (auto & entry : backends) {
199
+ if (entry.handle) {
200
+ entry.handle.release(); // NOLINT
201
+ }
202
+ }
203
+ }
204
+
205
+ void register_backend(lm_ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
206
+ if (!reg) {
207
+ return;
208
+ }
209
+
210
+ #ifndef NDEBUG
211
+ LM_GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
212
+ __func__, lm_ggml_backend_reg_name(reg), lm_ggml_backend_reg_dev_count(reg));
213
+ #endif
214
+ backends.push_back({ reg, std::move(handle) });
215
+ for (size_t i = 0; i < lm_ggml_backend_reg_dev_count(reg); i++) {
216
+ register_device(lm_ggml_backend_reg_dev_get(reg, i));
217
+ }
218
+ }
219
+
220
+ void register_device(lm_ggml_backend_dev_t device) {
221
+ #ifndef NDEBUG
222
+ LM_GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, lm_ggml_backend_dev_name(device), lm_ggml_backend_dev_description(device));
223
+ #endif
224
+ devices.push_back(device);
225
+ }
226
+
227
+ lm_ggml_backend_reg_t load_backend(const fs::path & path, bool silent) {
228
+ dl_handle_ptr handle { dl_load_library(path) };
229
+ if (!handle) {
230
+ if (!silent) {
231
+ LM_GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path_str(path).c_str());
232
+ }
233
+ return nullptr;
234
+ }
235
+
236
+ auto score_fn = (lm_ggml_backend_score_t) dl_get_sym(handle.get(), "lm_ggml_backend_score");
237
+ if (score_fn && score_fn() == 0) {
238
+ if (!silent) {
239
+ LM_GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path_str(path).c_str());
240
+ }
241
+ return nullptr;
242
+ }
243
+
244
+ auto backend_init_fn = (lm_ggml_backend_init_t) dl_get_sym(handle.get(), "lm_ggml_backend_init");
245
+ if (!backend_init_fn) {
246
+ if (!silent) {
247
+ LM_GGML_LOG_ERROR("%s: failed to find lm_ggml_backend_init in %s\n", __func__, path_str(path).c_str());
248
+ }
249
+ return nullptr;
250
+ }
251
+
252
+ lm_ggml_backend_reg_t reg = backend_init_fn();
253
+ if (!reg || reg->api_version != LM_GGML_BACKEND_API_VERSION) {
254
+ if (!silent) {
255
+ if (!reg) {
256
+ LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: lm_ggml_backend_init returned NULL\n",
257
+ __func__, path_str(path).c_str());
258
+ } else {
259
+ LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
260
+ __func__, path_str(path).c_str(), reg->api_version, LM_GGML_BACKEND_API_VERSION);
261
+ }
262
+ }
263
+ return nullptr;
264
+ }
265
+
266
+ LM_GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, lm_ggml_backend_reg_name(reg), path_str(path).c_str());
267
+
268
+ register_backend(reg, std::move(handle));
269
+
270
+ return reg;
271
+ }
272
+
273
+ void unload_backend(lm_ggml_backend_reg_t reg, bool silent) {
274
+ auto it = std::find_if(backends.begin(), backends.end(),
275
+ [reg](const lm_ggml_backend_reg_entry & entry) { return entry.reg == reg; });
276
+
277
+ if (it == backends.end()) {
278
+ if (!silent) {
279
+ LM_GGML_LOG_ERROR("%s: backend not found\n", __func__);
280
+ }
281
+ return;
282
+ }
283
+
284
+ if (!silent) {
285
+ LM_GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, lm_ggml_backend_reg_name(reg));
286
+ }
287
+
288
+ // remove devices
289
+ devices.erase(
290
+ std::remove_if(devices.begin(), devices.end(),
291
+ [reg](lm_ggml_backend_dev_t dev) { return lm_ggml_backend_dev_backend_reg(dev) == reg; }),
292
+ devices.end());
293
+
294
+ // remove backend
295
+ backends.erase(it);
296
+ }
297
+ };
298
+
299
+ static lm_ggml_backend_registry & get_reg() {
300
+ static lm_ggml_backend_registry reg;
301
+ return reg;
302
+ }
303
+
304
+ // Internal API
305
+ void lm_ggml_backend_register(lm_ggml_backend_reg_t reg) {
306
+ get_reg().register_backend(reg);
307
+ }
308
+
309
+ void lm_ggml_backend_device_register(lm_ggml_backend_dev_t device) {
310
+ get_reg().register_device(device);
311
+ }
312
+
313
+ // Backend (reg) enumeration
314
+ static bool striequals(const char * a, const char * b) {
315
+ for (; *a && *b; a++, b++) {
316
+ if (std::tolower(*a) != std::tolower(*b)) {
317
+ return false;
318
+ }
319
+ }
320
+ return *a == *b;
321
+ }
322
+
323
+ size_t lm_ggml_backend_reg_count() {
324
+ return get_reg().backends.size();
325
+ }
326
+
327
+ lm_ggml_backend_reg_t lm_ggml_backend_reg_get(size_t index) {
328
+ LM_GGML_ASSERT(index < lm_ggml_backend_reg_count());
329
+ return get_reg().backends[index].reg;
330
+ }
331
+
332
+ lm_ggml_backend_reg_t lm_ggml_backend_reg_by_name(const char * name) {
333
+ for (size_t i = 0; i < lm_ggml_backend_reg_count(); i++) {
334
+ lm_ggml_backend_reg_t reg = lm_ggml_backend_reg_get(i);
335
+ if (striequals(lm_ggml_backend_reg_name(reg), name)) {
336
+ return reg;
337
+ }
338
+ }
339
+ return nullptr;
340
+ }
341
+
342
+ // Device enumeration
343
+ size_t lm_ggml_backend_dev_count() {
344
+ return get_reg().devices.size();
345
+ }
346
+
347
+ lm_ggml_backend_dev_t lm_ggml_backend_dev_get(size_t index) {
348
+ LM_GGML_ASSERT(index < lm_ggml_backend_dev_count());
349
+ return get_reg().devices[index];
350
+ }
351
+
352
+ lm_ggml_backend_dev_t lm_ggml_backend_dev_by_name(const char * name) {
353
+ for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
354
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
355
+ if (striequals(lm_ggml_backend_dev_name(dev), name)) {
356
+ return dev;
357
+ }
358
+ }
359
+ return nullptr;
360
+ }
361
+
362
+ lm_ggml_backend_dev_t lm_ggml_backend_dev_by_type(enum lm_ggml_backend_dev_type type) {
363
+ for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
364
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
365
+ if (lm_ggml_backend_dev_type(dev) == type) {
366
+ return dev;
367
+ }
368
+ }
369
+ return nullptr;
370
+ }
371
+
372
+ // Convenience functions
373
+ lm_ggml_backend_t lm_ggml_backend_init_by_name(const char * name, const char * params) {
374
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_name(name);
375
+ if (!dev) {
376
+ return nullptr;
377
+ }
378
+ return lm_ggml_backend_dev_init(dev, params);
379
+ }
380
+
381
+ lm_ggml_backend_t lm_ggml_backend_init_by_type(enum lm_ggml_backend_dev_type type, const char * params) {
382
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(type);
383
+ if (!dev) {
384
+ return nullptr;
385
+ }
386
+ return lm_ggml_backend_dev_init(dev, params);
387
+ }
388
+
389
+ lm_ggml_backend_t lm_ggml_backend_init_best(void) {
390
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_GPU);
391
+ if (!dev) {
392
+ dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_CPU);
393
+ }
394
+ if (!dev) {
395
+ return nullptr;
396
+ }
397
+ return lm_ggml_backend_dev_init(dev, nullptr);
398
+ }
399
+
400
+ // Dynamic loading
401
+ lm_ggml_backend_reg_t lm_ggml_backend_load(const char * path) {
402
+ return get_reg().load_backend(path, false);
403
+ }
404
+
405
+ void lm_ggml_backend_unload(lm_ggml_backend_reg_t reg) {
406
+ get_reg().unload_backend(reg, true);
407
+ }
408
+
409
+ static fs::path get_executable_path() {
410
+ #if defined(__APPLE__)
411
+ // get executable path
412
+ std::vector<char> path;
413
+ uint32_t size;
414
+ while (true) {
415
+ size = path.size();
416
+ if (_NSGetExecutablePath(path.data(), &size) == 0) {
417
+ break;
418
+ }
419
+ path.resize(size);
420
+ }
421
+ std::string base_path(path.data(), size);
422
+ // remove executable name
423
+ auto last_slash = base_path.find_last_of('/');
424
+ if (last_slash != std::string::npos) {
425
+ base_path = base_path.substr(0, last_slash);
426
+ }
427
+ return base_path + "/";
428
+ #elif defined(__linux__) || defined(__FreeBSD__)
429
+ std::string base_path = ".";
430
+ std::vector<char> path(1024);
431
+ while (true) {
432
+ // get executable path
433
+ # if defined(__linux__)
434
+ ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
435
+ # elif defined(__FreeBSD__)
436
+ ssize_t len = readlink("/proc/curproc/file", path.data(), path.size());
437
+ # endif
438
+ if (len == -1) {
439
+ break;
440
+ }
441
+ if (len < (ssize_t) path.size()) {
442
+ base_path = std::string(path.data(), len);
443
+ // remove executable name
444
+ auto last_slash = base_path.find_last_of('/');
445
+ if (last_slash != std::string::npos) {
446
+ base_path = base_path.substr(0, last_slash);
447
+ }
448
+ break;
449
+ }
450
+ path.resize(path.size() * 2);
451
+ }
452
+
453
+ return base_path + "/";
454
+ #elif defined(_WIN32)
455
+ std::vector<wchar_t> path(MAX_PATH);
456
+ DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
457
+ if (len == 0) {
458
+ return {};
459
+ }
460
+ std::wstring base_path(path.data(), len);
461
+ // remove executable name
462
+ auto last_slash = base_path.find_last_of('\\');
463
+ if (last_slash != std::string::npos) {
464
+ base_path = base_path.substr(0, last_slash);
465
+ }
466
+ return base_path + L"\\";
467
+ #else
468
+ return {};
469
+ #endif
470
+ }
471
+
472
+ static fs::path backend_filename_prefix() {
473
+ #ifdef _WIN32
474
+ return fs::u8path("ggml-");
475
+ #else
476
+ return fs::u8path("libggml-");
477
+ #endif
478
+ }
479
+
480
+ static fs::path backend_filename_extension() {
481
+ #ifdef _WIN32
482
+ return fs::u8path(".dll");
483
+ #else
484
+ return fs::u8path(".so");
485
+ #endif
486
+ }
487
+
488
+ static lm_ggml_backend_reg_t lm_ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
489
+ // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
490
+ const fs::path name_path = fs::u8path(name);
491
+ const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native();
492
+ const fs::path file_extension = backend_filename_extension();
493
+
494
+ std::vector<fs::path> search_paths;
495
+ if (user_search_path == nullptr) {
496
+ // default search paths: executable directory, current directory
497
+ search_paths.push_back(get_executable_path());
498
+ search_paths.push_back(fs::current_path());
499
+ } else {
500
+ search_paths.push_back(fs::u8path(user_search_path));
501
+ }
502
+
503
+ int best_score = 0;
504
+ fs::path best_path;
505
+
506
+ for (const auto & search_path : search_paths) {
507
+ if (!fs::exists(search_path)) {
508
+ LM_GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str());
509
+ continue;
510
+ }
511
+ fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
512
+ for (const auto & entry : dir_it) {
513
+ if (entry.is_regular_file()) {
514
+ auto filename = entry.path().filename();
515
+ auto ext = entry.path().extension();
516
+ if (filename.native().find(file_prefix) == 0 && ext == file_extension) {
517
+ dl_handle_ptr handle { dl_load_library(entry) };
518
+ if (!handle && !silent) {
519
+ LM_GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path_str(entry.path()).c_str());
520
+ }
521
+ if (handle) {
522
+ auto score_fn = (lm_ggml_backend_score_t) dl_get_sym(handle.get(), "lm_ggml_backend_score");
523
+ if (score_fn) {
524
+ int s = score_fn();
525
+ #ifndef NDEBUG
526
+ LM_GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, path_str(entry.path()).c_str(), s);
527
+ #endif
528
+ if (s > best_score) {
529
+ best_score = s;
530
+ best_path = entry.path();
531
+ }
532
+ } else {
533
+ if (!silent) {
534
+ LM_GGML_LOG_INFO("%s: failed to find lm_ggml_backend_score in %s\n", __func__, path_str(entry.path()).c_str());
535
+ }
536
+ }
537
+ }
538
+ }
539
+ }
540
+ }
541
+ }
542
+
543
+ if (best_score == 0) {
544
+ // try to load the base backend
545
+ for (const auto & search_path : search_paths) {
546
+ fs::path filename = backend_filename_prefix().native() + name_path.native() + backend_filename_extension().native();
547
+ fs::path path = search_path / filename;
548
+ if (fs::exists(path)) {
549
+ return get_reg().load_backend(path, silent);
550
+ }
551
+ }
552
+ return nullptr;
553
+ }
554
+
555
+ return get_reg().load_backend(best_path, silent);
556
+ }
557
+
558
+ void lm_ggml_backend_load_all() {
559
+ lm_ggml_backend_load_all_from_path(nullptr);
560
+ }
561
+
562
+ void lm_ggml_backend_load_all_from_path(const char * dir_path) {
563
+ #ifdef NDEBUG
564
+ bool silent = true;
565
+ #else
566
+ bool silent = false;
567
+ #endif
568
+
569
+ lm_ggml_backend_load_best("blas", silent, dir_path);
570
+ lm_ggml_backend_load_best("cann", silent, dir_path);
571
+ lm_ggml_backend_load_best("cuda", silent, dir_path);
572
+ lm_ggml_backend_load_best("hip", silent, dir_path);
573
+ lm_ggml_backend_load_best("kompute", silent, dir_path);
574
+ lm_ggml_backend_load_best("metal", silent, dir_path);
575
+ lm_ggml_backend_load_best("rpc", silent, dir_path);
576
+ lm_ggml_backend_load_best("sycl", silent, dir_path);
577
+ lm_ggml_backend_load_best("vulkan", silent, dir_path);
578
+ lm_ggml_backend_load_best("opencl", silent, dir_path);
579
+ lm_ggml_backend_load_best("musa", silent, dir_path);
580
+ lm_ggml_backend_load_best("cpu", silent, dir_path);
581
+ // check the environment variable LM_GGML_BACKEND_PATH to load an out-of-tree backend
582
+ const char * backend_path = std::getenv("LM_GGML_BACKEND_PATH");
583
+ if (backend_path) {
584
+ lm_ggml_backend_load(backend_path);
585
+ }
586
+ }