cui-llama.rn 1.4.4 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. package/android/src/main/CMakeLists.txt +9 -2
  2. package/android/src/main/jni.cpp +54 -34
  3. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  11. package/cpp/binary-ops.cpp +158 -0
  12. package/cpp/binary-ops.h +16 -0
  13. package/cpp/chat.cpp +1769 -1085
  14. package/cpp/chat.h +143 -0
  15. package/cpp/common.cpp +1562 -1996
  16. package/cpp/common.h +677 -744
  17. package/cpp/cpu-common.h +72 -0
  18. package/cpp/ggml-alloc.c +1039 -1030
  19. package/cpp/ggml-alloc.h +1 -1
  20. package/cpp/ggml-backend-impl.h +255 -255
  21. package/cpp/ggml-backend-reg.cpp +586 -582
  22. package/cpp/ggml-backend.cpp +2004 -2002
  23. package/cpp/ggml-backend.h +354 -354
  24. package/cpp/ggml-common.h +1857 -1851
  25. package/cpp/ggml-cpp.h +39 -39
  26. package/cpp/ggml-cpu-aarch64.cpp +5725 -4247
  27. package/cpp/ggml-cpu-aarch64.h +8 -8
  28. package/cpp/ggml-cpu-impl.h +512 -380
  29. package/cpp/ggml-cpu-quants.c +13026 -11517
  30. package/cpp/ggml-cpu-traits.cpp +36 -36
  31. package/cpp/ggml-cpu-traits.h +38 -38
  32. package/cpp/ggml-cpu.c +3438 -14485
  33. package/cpp/ggml-cpu.cpp +655 -633
  34. package/cpp/ggml-cpu.h +138 -135
  35. package/cpp/ggml-impl.h +594 -567
  36. package/cpp/ggml-metal-impl.h +312 -3
  37. package/cpp/ggml-metal.h +66 -66
  38. package/cpp/ggml-metal.m +5360 -5002
  39. package/cpp/ggml-opt.cpp +854 -854
  40. package/cpp/ggml-opt.h +216 -216
  41. package/cpp/ggml-quants.c +5238 -5238
  42. package/cpp/ggml-threading.h +14 -14
  43. package/cpp/ggml.c +6618 -6524
  44. package/cpp/ggml.h +2222 -2194
  45. package/cpp/gguf.cpp +1330 -1329
  46. package/cpp/gguf.h +202 -202
  47. package/cpp/json-schema-to-grammar.cpp +1024 -1025
  48. package/cpp/json-schema-to-grammar.h +21 -22
  49. package/cpp/json.hpp +24766 -24766
  50. package/cpp/llama-adapter.cpp +382 -347
  51. package/cpp/llama-adapter.h +76 -74
  52. package/cpp/llama-arch.cpp +1714 -1492
  53. package/cpp/llama-arch.h +428 -402
  54. package/cpp/llama-batch.cpp +368 -368
  55. package/cpp/llama-batch.h +88 -88
  56. package/cpp/llama-chat.cpp +640 -587
  57. package/cpp/llama-chat.h +56 -53
  58. package/cpp/llama-context.cpp +2831 -1775
  59. package/cpp/llama-context.h +265 -128
  60. package/cpp/llama-cparams.cpp +1 -1
  61. package/cpp/llama-cparams.h +38 -37
  62. package/cpp/llama-cpp.h +30 -30
  63. package/cpp/llama-grammar.cpp +1219 -1219
  64. package/cpp/llama-grammar.h +173 -164
  65. package/cpp/llama-graph.cpp +1695 -0
  66. package/cpp/llama-graph.h +592 -0
  67. package/cpp/llama-hparams.cpp +79 -71
  68. package/cpp/llama-hparams.h +156 -139
  69. package/cpp/llama-impl.cpp +167 -167
  70. package/cpp/llama-impl.h +61 -61
  71. package/cpp/llama-io.cpp +15 -0
  72. package/cpp/llama-io.h +35 -0
  73. package/cpp/llama-kv-cache.cpp +1380 -718
  74. package/cpp/llama-kv-cache.h +213 -218
  75. package/cpp/llama-memory.cpp +1 -0
  76. package/cpp/llama-memory.h +21 -0
  77. package/cpp/llama-mmap.cpp +600 -590
  78. package/cpp/llama-mmap.h +68 -68
  79. package/cpp/llama-model-loader.cpp +1129 -1124
  80. package/cpp/llama-model-loader.h +169 -167
  81. package/cpp/llama-model.cpp +13080 -4023
  82. package/cpp/llama-model.h +409 -370
  83. package/cpp/llama-sampling.cpp +2563 -2525
  84. package/cpp/llama-sampling.h +32 -32
  85. package/cpp/llama-vocab.cpp +3295 -3252
  86. package/cpp/llama-vocab.h +125 -125
  87. package/cpp/llama.cpp +351 -10137
  88. package/cpp/llama.h +1434 -1340
  89. package/cpp/log.cpp +427 -423
  90. package/cpp/log.h +132 -132
  91. package/cpp/{chat-template.hpp → minja/chat-template.hpp} +537 -529
  92. package/cpp/{minja.hpp → minja/minja.hpp} +2941 -2883
  93. package/cpp/ops.cpp +8723 -0
  94. package/cpp/ops.h +128 -0
  95. package/cpp/rn-llama.cpp +45 -71
  96. package/cpp/rn-llama.h +3 -3
  97. package/cpp/sampling.cpp +573 -532
  98. package/cpp/sgemm.cpp +3043 -2598
  99. package/cpp/sgemm.h +14 -14
  100. package/cpp/simd-mappings.h +888 -0
  101. package/cpp/speculative.cpp +278 -277
  102. package/cpp/speculative.h +28 -28
  103. package/cpp/unary-ops.cpp +186 -0
  104. package/cpp/unary-ops.h +28 -0
  105. package/cpp/vec.cpp +258 -0
  106. package/cpp/vec.h +802 -0
  107. package/ios/CMakeLists.txt +5 -2
  108. package/ios/RNLlama.mm +2 -2
  109. package/ios/RNLlamaContext.mm +40 -24
  110. package/package.json +1 -1
  111. package/src/NativeRNLlama.ts +6 -4
  112. package/src/index.ts +3 -1
  113. package/android/src/main/build-arm64/CMakeCache.txt +0 -429
  114. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
  115. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCXXCompiler.cmake +0 -101
  116. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_C.bin +0 -0
  117. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_CXX.bin +0 -0
  118. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
  119. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
  120. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
  121. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
  122. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
  123. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -431
  124. package/android/src/main/build-arm64/CMakeFiles/CMakeDirectoryInformation.cmake +0 -16
  125. package/android/src/main/build-arm64/CMakeFiles/Makefile.cmake +0 -165
  126. package/android/src/main/build-arm64/CMakeFiles/Makefile2 +0 -297
  127. package/android/src/main/build-arm64/CMakeFiles/Progress/1 +0 -1
  128. package/android/src/main/build-arm64/CMakeFiles/Progress/2 +0 -1
  129. package/android/src/main/build-arm64/CMakeFiles/Progress/3 +0 -1
  130. package/android/src/main/build-arm64/CMakeFiles/Progress/4 +0 -1
  131. package/android/src/main/build-arm64/CMakeFiles/Progress/5 +0 -1
  132. package/android/src/main/build-arm64/CMakeFiles/Progress/6 +0 -1
  133. package/android/src/main/build-arm64/CMakeFiles/Progress/count.txt +0 -1
  134. package/android/src/main/build-arm64/CMakeFiles/TargetDirectories.txt +0 -8
  135. package/android/src/main/build-arm64/CMakeFiles/cmake.check_cache +0 -1
  136. package/android/src/main/build-arm64/CMakeFiles/progress.marks +0 -1
  137. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o +0 -0
  138. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o.d +0 -58
  139. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o +0 -0
  140. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o.d +0 -756
  141. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o +0 -0
  142. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o.d +0 -709
  143. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o +0 -0
  144. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o.d +0 -714
  145. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o +0 -0
  146. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o.d +0 -62
  147. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o +0 -0
  148. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o.d +0 -708
  149. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o +0 -0
  150. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o.d +0 -113
  151. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o +0 -0
  152. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o.d +0 -713
  153. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o +0 -0
  154. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o.d +0 -763
  155. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o +0 -0
  156. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o.d +0 -61
  157. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o +0 -0
  158. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o.d +0 -707
  159. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o +0 -0
  160. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o.d +0 -104
  161. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o +0 -0
  162. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o.d +0 -714
  163. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o +0 -0
  164. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o.d +0 -723
  165. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/DependInfo.cmake +0 -62
  166. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/build.make +0 -722
  167. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/cmake_clean.cmake +0 -89
  168. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.make +0 -2
  169. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.ts +0 -2
  170. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/depend.make +0 -2
  171. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/flags.make +0 -17
  172. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/progress.make +0 -41
  173. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/DependInfo.cmake +0 -62
  174. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/build.make +0 -722
  175. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/cmake_clean.cmake +0 -89
  176. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.make +0 -2
  177. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.ts +0 -2
  178. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/depend.make +0 -2
  179. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/flags.make +0 -17
  180. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/progress.make +0 -41
  181. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/DependInfo.cmake +0 -62
  182. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/build.make +0 -722
  183. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/cmake_clean.cmake +0 -89
  184. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.make +0 -2
  185. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.ts +0 -2
  186. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/depend.make +0 -2
  187. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/flags.make +0 -17
  188. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/progress.make +0 -41
  189. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/DependInfo.cmake +0 -62
  190. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/build.make +0 -722
  191. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/cmake_clean.cmake +0 -89
  192. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.make +0 -2
  193. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.ts +0 -2
  194. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/depend.make +0 -2
  195. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/flags.make +0 -17
  196. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/progress.make +0 -41
  197. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/DependInfo.cmake +0 -62
  198. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/build.make +0 -722
  199. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/cmake_clean.cmake +0 -89
  200. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.make +0 -2
  201. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.ts +0 -2
  202. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/depend.make +0 -2
  203. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/flags.make +0 -17
  204. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/progress.make +0 -41
  205. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/DependInfo.cmake +0 -62
  206. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/build.make +0 -722
  207. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/cmake_clean.cmake +0 -89
  208. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.make +0 -2
  209. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.ts +0 -2
  210. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/depend.make +0 -2
  211. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/flags.make +0 -17
  212. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/progress.make +0 -41
  213. package/android/src/main/build-arm64/Makefile +0 -1862
  214. package/android/src/main/build-arm64/cmake_install.cmake +0 -66
  215. package/cpp/chat.hpp +0 -55
  216. package/cpp/rn-llama.hpp +0 -913
package/cpp/ggml-cpu.cpp CHANGED
@@ -1,633 +1,655 @@
1
- #include "ggml-backend.h"
2
- #include "ggml-backend-impl.h"
3
- #include "ggml-cpu.h"
4
- #include "ggml-cpu-aarch64.h"
5
- #include "ggml-cpu-traits.h"
6
- #include "ggml-impl.h"
7
-
8
- #include <cctype>
9
- #include <string>
10
- #include <vector>
11
-
12
- #ifdef LM_GGML_USE_CPU_HBM
13
- #include "ggml-cpu-hbm.h"
14
- #endif
15
-
16
- #if defined(__APPLE__)
17
- #include <sys/types.h>
18
- #include <sys/sysctl.h>
19
- #endif
20
-
21
- #if defined(_WIN32)
22
- #define WIN32_LEAN_AND_MEAN
23
- #ifndef NOMINMAX
24
- #define NOMINMAX
25
- #endif
26
- #include <windows.h>
27
- #endif
28
-
29
- // ggml-backend interface
30
-
31
- std::vector<lm_ggml_backend_buffer_type_t>& lm_ggml_backend_cpu_get_extra_buffers_type() {
32
- static std::vector<lm_ggml_backend_buffer_type_t> bufts = []() {
33
- std::vector<lm_ggml_backend_buffer_type_t> bufts;
34
-
35
- #if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
36
- if (lm_ggml_backend_amx_buffer_type()) {
37
- bufts.push_back(lm_ggml_backend_amx_buffer_type());
38
- }
39
- #endif
40
-
41
- #ifdef LM_GGML_USE_CPU_AARCH64
42
- if (lm_ggml_backend_cpu_aarch64_buffer_type()) {
43
- bufts.push_back(lm_ggml_backend_cpu_aarch64_buffer_type());
44
- }
45
- #endif
46
-
47
- bufts.push_back(NULL);
48
-
49
- return bufts;
50
- }();
51
-
52
- return bufts;
53
- }
54
-
55
- static lm_ggml_backend_buffer_type_t * lm_ggml_backend_cpu_device_get_extra_buffers_type(lm_ggml_backend_dev_t device) {
56
- return lm_ggml_backend_cpu_get_extra_buffers_type().data();
57
-
58
- LM_GGML_UNUSED(device);
59
- }
60
-
61
- static bool lm_ggml_backend_cpu_is_extra_buffer_type(lm_ggml_backend_buffer_type_t buft) {
62
- for (auto extra : lm_ggml_backend_cpu_get_extra_buffers_type()) {
63
- if (extra && extra == buft) return true;
64
- }
65
- return false;
66
- }
67
-
68
- // CPU backend - backend (stream)
69
-
70
- struct lm_ggml_backend_cpu_context {
71
- int n_threads;
72
- lm_ggml_threadpool_t threadpool;
73
-
74
- uint8_t * work_data;
75
- size_t work_size;
76
-
77
- lm_ggml_abort_callback abort_callback;
78
- void * abort_callback_data;
79
- };
80
-
81
- static const char * lm_ggml_backend_cpu_get_name(lm_ggml_backend_t backend) {
82
- return "CPU";
83
-
84
- LM_GGML_UNUSED(backend);
85
- }
86
-
87
- static void lm_ggml_backend_cpu_free(lm_ggml_backend_t backend) {
88
- struct lm_ggml_backend_cpu_context * cpu_ctx = (struct lm_ggml_backend_cpu_context *)backend->context;
89
- delete[] cpu_ctx->work_data;
90
- delete cpu_ctx;
91
- delete backend;
92
- }
93
-
94
- struct lm_ggml_backend_plan_cpu {
95
- struct lm_ggml_cplan cplan;
96
- struct lm_ggml_cgraph cgraph;
97
- };
98
-
99
- static lm_ggml_backend_graph_plan_t lm_ggml_backend_cpu_graph_plan_create(lm_ggml_backend_t backend, const struct lm_ggml_cgraph * cgraph) {
100
- struct lm_ggml_backend_cpu_context * cpu_ctx = (struct lm_ggml_backend_cpu_context *)backend->context;
101
-
102
- struct lm_ggml_backend_plan_cpu * cpu_plan = new lm_ggml_backend_plan_cpu;
103
-
104
- cpu_plan->cplan = lm_ggml_graph_plan(cgraph, cpu_ctx->n_threads, cpu_ctx->threadpool);
105
- cpu_plan->cgraph = *cgraph; // FIXME: deep copy
106
-
107
- if (cpu_plan->cplan.work_size > 0) {
108
- cpu_plan->cplan.work_data = new uint8_t[cpu_plan->cplan.work_size];
109
- if (cpu_plan->cplan.work_data == NULL) {
110
- delete cpu_plan;
111
- return NULL;
112
- }
113
- }
114
-
115
- cpu_plan->cplan.abort_callback = cpu_ctx->abort_callback;
116
- cpu_plan->cplan.abort_callback_data = cpu_ctx->abort_callback_data;
117
-
118
- return cpu_plan;
119
- }
120
-
121
- static void lm_ggml_backend_cpu_graph_plan_free(lm_ggml_backend_t backend, lm_ggml_backend_graph_plan_t plan) {
122
- struct lm_ggml_backend_plan_cpu * cpu_plan = (struct lm_ggml_backend_plan_cpu *)plan;
123
-
124
- delete[] cpu_plan->cplan.work_data;
125
- delete cpu_plan;
126
-
127
- LM_GGML_UNUSED(backend);
128
- }
129
-
130
- static enum lm_ggml_status lm_ggml_backend_cpu_graph_plan_compute(lm_ggml_backend_t backend, lm_ggml_backend_graph_plan_t plan) {
131
- struct lm_ggml_backend_plan_cpu * cpu_plan = (struct lm_ggml_backend_plan_cpu *)plan;
132
-
133
- return lm_ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan);
134
-
135
- LM_GGML_UNUSED(backend);
136
- }
137
-
138
- static enum lm_ggml_status lm_ggml_backend_cpu_graph_compute(lm_ggml_backend_t backend, struct lm_ggml_cgraph * cgraph) {
139
- struct lm_ggml_backend_cpu_context * cpu_ctx = (struct lm_ggml_backend_cpu_context *)backend->context;
140
-
141
- struct lm_ggml_cplan cplan = lm_ggml_graph_plan(cgraph, cpu_ctx->n_threads, cpu_ctx->threadpool);
142
-
143
- if (cpu_ctx->work_size < cplan.work_size) {
144
- delete[] cpu_ctx->work_data;
145
- cpu_ctx->work_data = new uint8_t[cplan.work_size];
146
- if (cpu_ctx->work_data == NULL) {
147
- cpu_ctx->work_size = 0;
148
- return LM_GGML_STATUS_ALLOC_FAILED;
149
- }
150
- cpu_ctx->work_size = cplan.work_size;
151
- }
152
- cplan.work_data = (uint8_t *)cpu_ctx->work_data;
153
-
154
- cplan.abort_callback = cpu_ctx->abort_callback;
155
- cplan.abort_callback_data = cpu_ctx->abort_callback_data;
156
-
157
- return lm_ggml_graph_compute(cgraph, &cplan);
158
- }
159
-
160
- static const struct lm_ggml_backend_i lm_ggml_backend_cpu_i = {
161
- /* .get_name = */ lm_ggml_backend_cpu_get_name,
162
- /* .free = */ lm_ggml_backend_cpu_free,
163
- /* .set_tensor_async = */ NULL,
164
- /* .get_tensor_async = */ NULL,
165
- /* .cpy_tensor_async = */ NULL,
166
- /* .synchronize = */ NULL,
167
- /* .graph_plan_create = */ lm_ggml_backend_cpu_graph_plan_create,
168
- /* .graph_plan_free = */ lm_ggml_backend_cpu_graph_plan_free,
169
- /* .graph_plan_update = */ NULL,
170
- /* .graph_plan_compute = */ lm_ggml_backend_cpu_graph_plan_compute,
171
- /* .graph_compute = */ lm_ggml_backend_cpu_graph_compute,
172
- /* .event_record = */ NULL,
173
- /* .event_wait = */ NULL,
174
- };
175
-
176
- static lm_ggml_guid_t lm_ggml_backend_cpu_guid(void) {
177
- static lm_ggml_guid guid = { 0xaa, 0x67, 0xc7, 0x43, 0x96, 0xe6, 0xa3, 0x8a, 0xe3, 0xaf, 0xea, 0x92, 0x36, 0xbc, 0xfc, 0x89 };
178
- return &guid;
179
- }
180
-
181
- lm_ggml_backend_t lm_ggml_backend_cpu_init(void) {
182
- // initialize CPU backend now to avoid slowing the first graph computation
183
- lm_ggml_cpu_init();
184
-
185
- struct lm_ggml_backend_cpu_context * ctx = new lm_ggml_backend_cpu_context;
186
- if (ctx == NULL) {
187
- return NULL;
188
- }
189
-
190
- ctx->n_threads = LM_GGML_DEFAULT_N_THREADS;
191
- ctx->threadpool = NULL;
192
- ctx->work_data = NULL;
193
- ctx->work_size = 0;
194
- ctx->abort_callback = NULL;
195
- ctx->abort_callback_data = NULL;
196
-
197
- lm_ggml_backend_t cpu_backend = new lm_ggml_backend {
198
- /* .guid = */ lm_ggml_backend_cpu_guid(),
199
- /* .interface = */ lm_ggml_backend_cpu_i,
200
- /* .device = */ lm_ggml_backend_reg_dev_get(lm_ggml_backend_cpu_reg(), 0),
201
- /* .context = */ ctx,
202
- };
203
-
204
- if (cpu_backend == NULL) {
205
- delete ctx;
206
- return NULL;
207
- }
208
-
209
- return cpu_backend;
210
- }
211
-
212
- bool lm_ggml_backend_is_cpu(lm_ggml_backend_t backend) {
213
- return backend != NULL && lm_ggml_guid_matches(backend->guid, lm_ggml_backend_cpu_guid());
214
- }
215
-
216
- void lm_ggml_backend_cpu_set_n_threads(lm_ggml_backend_t backend_cpu, int n_threads) {
217
- LM_GGML_ASSERT(lm_ggml_backend_is_cpu(backend_cpu));
218
-
219
- struct lm_ggml_backend_cpu_context * ctx = (struct lm_ggml_backend_cpu_context *)backend_cpu->context;
220
- ctx->n_threads = n_threads;
221
- }
222
-
223
- void lm_ggml_backend_cpu_set_threadpool(lm_ggml_backend_t backend_cpu, lm_ggml_threadpool_t threadpool) {
224
- LM_GGML_ASSERT(lm_ggml_backend_is_cpu(backend_cpu));
225
-
226
- struct lm_ggml_backend_cpu_context * ctx = (struct lm_ggml_backend_cpu_context *)backend_cpu->context;
227
-
228
- if (ctx->threadpool && ctx->threadpool != threadpool) {
229
- // already had a different threadpool, pause/suspend it before switching
230
- lm_ggml_threadpool_pause(ctx->threadpool);
231
- }
232
- ctx->threadpool = threadpool;
233
- }
234
-
235
- void lm_ggml_backend_cpu_set_abort_callback(lm_ggml_backend_t backend_cpu, lm_ggml_abort_callback abort_callback, void * abort_callback_data) {
236
- LM_GGML_ASSERT(lm_ggml_backend_is_cpu(backend_cpu));
237
-
238
- struct lm_ggml_backend_cpu_context * ctx = (struct lm_ggml_backend_cpu_context *)backend_cpu->context;
239
- ctx->abort_callback = abort_callback;
240
- ctx->abort_callback_data = abort_callback_data;
241
- }
242
-
243
- // CPU backend - device
244
-
245
- struct lm_ggml_backend_cpu_device_context {
246
- std::string description = "CPU";
247
-
248
- lm_ggml_backend_cpu_device_context() {
249
- #ifdef __APPLE__
250
- size_t len = 0;
251
- if (!sysctlbyname("machdep.cpu.brand_string", NULL, &len, NULL, 0)) {
252
- description.resize(len);
253
- sysctlbyname("machdep.cpu.brand_string", &description[0], &len, NULL, 0); // NOLINT
254
- }
255
- #elif defined(__linux__)
256
- FILE * f = fopen("/proc/cpuinfo", "r");
257
- if (f) {
258
- char buf[1024];
259
- while (fgets(buf, sizeof(buf), f)) {
260
- if (strncmp(buf, "model name", 10) == 0) {
261
- char * p = strchr(buf, ':');
262
- if (p) {
263
- p++;
264
- while (std::isspace(*p)) {
265
- p++;
266
- }
267
- while (std::isspace(p[strlen(p) - 1])) {
268
- p[strlen(p) - 1] = '\0';
269
- }
270
- description = p;
271
- break;
272
- }
273
- }
274
- }
275
- fclose(f);
276
- }
277
- #elif defined(_WIN32)
278
- HKEY hKey;
279
- if (RegOpenKeyEx(HKEY_LOCAL_MACHINE,
280
- TEXT("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"),
281
- 0,
282
- KEY_READ,
283
- &hKey) == ERROR_SUCCESS) {
284
- DWORD cpu_brand_size = 0;
285
- if (RegQueryValueExA(hKey,
286
- "ProcessorNameString",
287
- NULL,
288
- NULL,
289
- NULL,
290
- &cpu_brand_size) == ERROR_SUCCESS) {
291
- description.resize(cpu_brand_size);
292
- if (RegQueryValueExA(hKey,
293
- "ProcessorNameString",
294
- NULL,
295
- NULL,
296
- (LPBYTE)&description[0], // NOLINT
297
- &cpu_brand_size) == ERROR_SUCCESS) {
298
- if (description.find('\0') != std::string::npos) {
299
- description.resize(description.find('\0'));
300
- }
301
- }
302
- }
303
- RegCloseKey(hKey);
304
- }
305
- #endif
306
- }
307
- };
308
-
309
- static const char * lm_ggml_backend_cpu_device_get_name(lm_ggml_backend_dev_t dev) {
310
- return "CPU";
311
-
312
- LM_GGML_UNUSED(dev);
313
- }
314
-
315
- static const char * lm_ggml_backend_cpu_device_get_description(lm_ggml_backend_dev_t dev) {
316
- struct lm_ggml_backend_cpu_device_context * ctx = (struct lm_ggml_backend_cpu_device_context *)dev->context;
317
-
318
- return ctx->description.c_str();
319
- }
320
-
321
- static void lm_ggml_backend_cpu_device_get_memory(lm_ggml_backend_dev_t dev, size_t * free, size_t * total) {
322
- // TODO
323
- *free = 0;
324
- *total = 0;
325
-
326
- LM_GGML_UNUSED(dev);
327
- }
328
-
329
- static enum lm_ggml_backend_dev_type lm_ggml_backend_cpu_device_get_type(lm_ggml_backend_dev_t dev) {
330
- return LM_GGML_BACKEND_DEVICE_TYPE_CPU;
331
-
332
- LM_GGML_UNUSED(dev);
333
- }
334
-
335
- static void lm_ggml_backend_cpu_device_get_props(lm_ggml_backend_dev_t dev, struct lm_ggml_backend_dev_props * props) {
336
- props->name = lm_ggml_backend_cpu_device_get_name(dev);
337
- props->description = lm_ggml_backend_cpu_device_get_description(dev);
338
- props->type = lm_ggml_backend_cpu_device_get_type(dev);
339
- lm_ggml_backend_cpu_device_get_memory(dev, &props->memory_free, &props->memory_total);
340
- props->caps = {
341
- /* .async = */ false,
342
- /* .host_buffer = */ false,
343
- /* .buffer_from_host_ptr = */ true,
344
- /* .events = */ false,
345
- };
346
- }
347
-
348
- static lm_ggml_backend_t lm_ggml_backend_cpu_device_init_backend(lm_ggml_backend_dev_t dev, const char * params) {
349
- return lm_ggml_backend_cpu_init();
350
-
351
- LM_GGML_UNUSED(dev);
352
- LM_GGML_UNUSED(params);
353
- }
354
-
355
- static lm_ggml_backend_buffer_type_t lm_ggml_backend_cpu_device_get_buffer_type(lm_ggml_backend_dev_t dev) {
356
- return lm_ggml_backend_cpu_buffer_type();
357
-
358
- LM_GGML_UNUSED(dev);
359
- }
360
-
361
- static lm_ggml_backend_buffer_t lm_ggml_backend_cpu_device_buffer_from_host_ptr(lm_ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
362
- return lm_ggml_backend_cpu_buffer_from_ptr(ptr, size);
363
-
364
- LM_GGML_UNUSED(dev);
365
- LM_GGML_UNUSED(max_tensor_size);
366
- }
367
-
368
- static bool lm_ggml_backend_cpu_device_supports_op(lm_ggml_backend_dev_t dev, const struct lm_ggml_tensor * op) {
369
- const struct lm_ggml_tensor * src0 = op->src[0];
370
- const struct lm_ggml_tensor * src1 = op->src[1];
371
-
372
- if (op->op == LM_GGML_OP_NONE || op->op == LM_GGML_OP_RESHAPE || op->op == LM_GGML_OP_VIEW || op->op == LM_GGML_OP_PERMUTE || op->op == LM_GGML_OP_TRANSPOSE) {
373
- return true;
374
- }
375
-
376
- // extra_buffer_op?
377
- for (auto extra : lm_ggml_backend_cpu_get_extra_buffers_type()) {
378
- if (extra) {
379
- auto buf_extra = (ggml::cpu::extra_buffer_type*) extra->context;
380
- if (buf_extra && buf_extra->supports_op(dev, op)) {
381
- return true;
382
- }
383
- }
384
- }
385
-
386
- // the other case need host buffer.
387
- for (int i = 0; i < LM_GGML_MAX_SRC; i++) {
388
- if (op->src[i] && op->src[i]->buffer && !lm_ggml_backend_buft_is_host(op->src[i]->buffer->buft)) {
389
- return false;
390
- }
391
- }
392
-
393
- switch (op->op) {
394
- case LM_GGML_OP_CPY:
395
- return
396
- op->type != LM_GGML_TYPE_IQ3_XXS &&
397
- op->type != LM_GGML_TYPE_IQ3_S &&
398
- op->type != LM_GGML_TYPE_IQ2_XXS &&
399
- op->type != LM_GGML_TYPE_IQ2_XS &&
400
- op->type != LM_GGML_TYPE_IQ2_S &&
401
- op->type != LM_GGML_TYPE_IQ1_S &&
402
- op->type != LM_GGML_TYPE_IQ1_M; // missing type_traits.from_float
403
- case LM_GGML_OP_MUL_MAT:
404
- return src1->type == LM_GGML_TYPE_F32 || src1->type == lm_ggml_get_type_traits_cpu(src0->type)->vec_dot_type;
405
- case LM_GGML_OP_SOFT_MAX_BACK: {
406
- if (op->src[0]->type != LM_GGML_TYPE_F32 || op->src[1]->type != LM_GGML_TYPE_F32) {
407
- return false;
408
- }
409
- float max_bias = 0.0f;
410
-
411
- memcpy(&max_bias, (const float *) op->op_params + 1, sizeof(float));
412
-
413
- return max_bias == 0.0f;
414
- }
415
- case LM_GGML_OP_IM2COL_BACK:
416
- return src0->type == LM_GGML_TYPE_F32 && src1->type == LM_GGML_TYPE_F32;
417
- case LM_GGML_OP_OUT_PROD:
418
- return (src0->type == LM_GGML_TYPE_F32 || (lm_ggml_is_quantized(src0->type) && src0->ne[2] == src1->ne[2] && src0->ne[3] == src1->ne[3])) &&
419
- src1->type == LM_GGML_TYPE_F32 && op->type == LM_GGML_TYPE_F32;
420
- default:
421
- return true;
422
- }
423
- }
424
-
425
- static bool lm_ggml_backend_cpu_device_supports_buft(lm_ggml_backend_dev_t dev, lm_ggml_backend_buffer_type_t buft) {
426
- return lm_ggml_backend_buft_is_host(buft) || lm_ggml_backend_cpu_is_extra_buffer_type(buft);
427
- LM_GGML_UNUSED(dev);
428
- }
429
-
430
- static const struct lm_ggml_backend_device_i lm_ggml_backend_cpu_device_i = {
431
- /* .get_name = */ lm_ggml_backend_cpu_device_get_name,
432
- /* .get_description = */ lm_ggml_backend_cpu_device_get_description,
433
- /* .get_memory = */ lm_ggml_backend_cpu_device_get_memory,
434
- /* .get_type = */ lm_ggml_backend_cpu_device_get_type,
435
- /* .get_props = */ lm_ggml_backend_cpu_device_get_props,
436
- /* .init_backend = */ lm_ggml_backend_cpu_device_init_backend,
437
- /* .get_buffer_type = */ lm_ggml_backend_cpu_device_get_buffer_type,
438
- /* .get_host_buffer_type = */ NULL,
439
- /* .buffer_from_host_ptr = */ lm_ggml_backend_cpu_device_buffer_from_host_ptr,
440
- /* .supports_op = */ lm_ggml_backend_cpu_device_supports_op,
441
- /* .supports_buft = */ lm_ggml_backend_cpu_device_supports_buft,
442
- /* .offload_op = */ NULL,
443
- /* .event_new = */ NULL,
444
- /* .event_free = */ NULL,
445
- /* .event_synchronize = */ NULL,
446
- };
447
-
448
- // CPU backend - backend (reg)
449
-
450
- static const char * lm_ggml_backend_cpu_reg_get_name(lm_ggml_backend_reg_t reg) {
451
- return "CPU";
452
-
453
- LM_GGML_UNUSED(reg);
454
- }
455
-
456
- static size_t lm_ggml_backend_cpu_reg_get_device_count(lm_ggml_backend_reg_t reg) {
457
- return 1;
458
-
459
- LM_GGML_UNUSED(reg);
460
- }
461
-
462
- static lm_ggml_backend_dev_t lm_ggml_backend_cpu_reg_get_device(lm_ggml_backend_reg_t reg, size_t index) {
463
- LM_GGML_ASSERT(index == 0);
464
-
465
- static lm_ggml_backend_cpu_device_context ctx;
466
- static lm_ggml_backend_device lm_ggml_backend_cpu_device = {
467
- /* .iface = */ lm_ggml_backend_cpu_device_i,
468
- /* .reg = */ reg,
469
- /* .context = */ &ctx,
470
- };
471
-
472
- return &lm_ggml_backend_cpu_device;
473
- }
474
-
475
- // This is intended to replace the the lm_ggml_cpu_has_* functions when loading the CPU backend dynamically,
476
- // and additionally to allow other backends to expose their own list of features that applications can query using the same API
477
- static lm_ggml_backend_feature * lm_ggml_backend_cpu_get_features(lm_ggml_backend_reg_t reg) {
478
- static std::vector<lm_ggml_backend_feature> features = []() {
479
- lm_ggml_cpu_init();
480
-
481
- std::vector<lm_ggml_backend_feature> features;
482
- if (lm_ggml_cpu_has_sse3()) {
483
- features.push_back({ "SSE3", "1" });
484
- }
485
- if (lm_ggml_cpu_has_ssse3()) {
486
- features.push_back({ "SSSE3", "1" });
487
- }
488
- if (lm_ggml_cpu_has_avx()) {
489
- features.push_back({ "AVX", "1" });
490
- }
491
- if (lm_ggml_cpu_has_avx_vnni()) {
492
- features.push_back({ "AVX_VNNI", "1" });
493
- }
494
- if (lm_ggml_cpu_has_avx2()) {
495
- features.push_back({ "AVX2", "1" });
496
- }
497
- if (lm_ggml_cpu_has_f16c()) {
498
- features.push_back({ "F16C", "1" });
499
- }
500
- if (lm_ggml_cpu_has_fma()) {
501
- features.push_back({ "FMA", "1" });
502
- }
503
- if (lm_ggml_cpu_has_avx512()) {
504
- features.push_back({ "AVX512", "1" });
505
- }
506
- if (lm_ggml_cpu_has_avx512_vbmi()) {
507
- features.push_back({ "AVX512_VBMI", "1" });
508
- }
509
- if (lm_ggml_cpu_has_avx512_vnni()) {
510
- features.push_back({ "AVX512_VNNI", "1" });
511
- }
512
- if (lm_ggml_cpu_has_avx512_bf16()) {
513
- features.push_back({ "AVX512_BF16", "1" });
514
- }
515
- if (lm_ggml_cpu_has_amx_int8()) {
516
- features.push_back({ "AMX_INT8", "1" });
517
- }
518
- if (lm_ggml_cpu_has_neon()) {
519
- features.push_back({ "NEON", "1" });
520
- }
521
- if (lm_ggml_cpu_has_arm_fma()) {
522
- features.push_back({ "ARM_FMA", "1" });
523
- }
524
- if (lm_ggml_cpu_has_fp16_va()) {
525
- features.push_back({ "FP16_VA", "1" });
526
- }
527
- if (lm_ggml_cpu_has_matmul_int8()) {
528
- features.push_back({ "MATMUL_INT8", "1" });
529
- }
530
- if (lm_ggml_cpu_has_sve()) {
531
- features.push_back({ "SVE", "1" });
532
- }
533
- if (lm_ggml_cpu_has_dotprod()) {
534
- features.push_back({ "DOTPROD", "1" });
535
- }
536
- if (lm_ggml_cpu_get_sve_cnt() > 0) {
537
- static std::string sve_cnt = std::to_string(lm_ggml_cpu_get_sve_cnt());
538
- features.push_back({ "SVE_CNT", sve_cnt.c_str() });
539
- }
540
- if (lm_ggml_cpu_has_riscv_v()) {
541
- features.push_back({ "RISCV_V", "1" });
542
- }
543
- if (lm_ggml_cpu_has_vsx()) {
544
- features.push_back({ "VSX", "1" });
545
- }
546
- if (lm_ggml_cpu_has_wasm_simd()) {
547
- features.push_back({ "WASM_SIMD", "1" });
548
- }
549
- if (lm_ggml_cpu_has_llamafile()) {
550
- features.push_back({ "LLAMAFILE", "1" });
551
- }
552
- #ifdef LM_GGML_USE_ACCELERATE
553
- features.push_back({ "ACCELERATE", "1" });
554
- #endif
555
- #ifdef LM_GGML_USE_CPU_HBM
556
- features.push_back({ "CPU_HBM", "1" });
557
- #endif
558
- #ifdef LM_GGML_USE_OPENMP
559
- features.push_back({ "OPENMP", "1" });
560
- #endif
561
- #ifdef LM_GGML_USE_CPU_AARCH64
562
- features.push_back({ "AARCH64_REPACK", "1" });
563
- #endif
564
-
565
- features.push_back({ nullptr, nullptr });
566
-
567
- return features;
568
- }();
569
-
570
- return features.data();
571
-
572
- LM_GGML_UNUSED(reg);
573
- }
574
-
575
- static void * lm_ggml_backend_cpu_get_proc_address(lm_ggml_backend_reg_t reg, const char * name) {
576
- if (strcmp(name, "lm_ggml_backend_set_n_threads") == 0) {
577
- lm_ggml_backend_set_n_threads_t fct = lm_ggml_backend_cpu_set_n_threads;
578
- return (void *)fct;
579
- }
580
- if (strcmp(name, "lm_ggml_backend_dev_get_extra_bufts") == 0) {
581
- lm_ggml_backend_dev_get_extra_bufts_t fct = lm_ggml_backend_cpu_device_get_extra_buffers_type;
582
- return (void *)fct;
583
- }
584
- if (strcmp(name, "lm_ggml_backend_get_features") == 0) {
585
- return (void *)lm_ggml_backend_cpu_get_features;
586
- }
587
- if (strcmp(name, "lm_ggml_backend_set_abort_callback") == 0) {
588
- return (void *)lm_ggml_backend_cpu_set_abort_callback;
589
- }
590
- if (strcmp(name, "lm_ggml_backend_cpu_numa_init") == 0) {
591
- return (void *)lm_ggml_numa_init;
592
- }
593
- if (strcmp(name, "lm_ggml_backend_cpu_is_numa") == 0) {
594
- return (void *)lm_ggml_is_numa;
595
- }
596
-
597
- // threadpool - TODO: move to ggml-base
598
- if (strcmp(name, "lm_ggml_threadpool_new") == 0) {
599
- return (void *)lm_ggml_threadpool_new;
600
- }
601
- if (strcmp(name, "lm_ggml_threadpool_free") == 0) {
602
- return (void *)lm_ggml_threadpool_free;
603
- }
604
- if (strcmp(name, "lm_ggml_backend_cpu_set_threadpool") == 0) {
605
- return (void *)lm_ggml_backend_cpu_set_threadpool;
606
- }
607
-
608
- return NULL;
609
-
610
- LM_GGML_UNUSED(reg);
611
- }
612
-
613
- static const struct lm_ggml_backend_reg_i lm_ggml_backend_cpu_reg_i = {
614
- /* .get_name = */ lm_ggml_backend_cpu_reg_get_name,
615
- /* .get_device_count = */ lm_ggml_backend_cpu_reg_get_device_count,
616
- /* .get_device = */ lm_ggml_backend_cpu_reg_get_device,
617
- /* .get_proc_address = */ lm_ggml_backend_cpu_get_proc_address,
618
- };
619
-
620
- lm_ggml_backend_reg_t lm_ggml_backend_cpu_reg(void) {
621
- // init CPU feature detection
622
- lm_ggml_cpu_init();
623
-
624
- static struct lm_ggml_backend_reg lm_ggml_backend_cpu_reg = {
625
- /* .api_version = */ LM_GGML_BACKEND_API_VERSION,
626
- /* .iface = */ lm_ggml_backend_cpu_reg_i,
627
- /* .context = */ NULL,
628
- };
629
-
630
- return &lm_ggml_backend_cpu_reg;
631
- }
632
-
633
- LM_GGML_BACKEND_DL_IMPL(lm_ggml_backend_cpu_reg)
1
+ #include "ggml-backend.h"
2
+ #include "ggml-backend-impl.h"
3
+ #include "ggml-cpu.h"
4
+ #include "ggml-cpu-aarch64.h"
5
+ #include "ggml-cpu-traits.h"
6
+ #include "ggml-impl.h"
7
+
8
+ #include <cctype>
9
+ #include <string>
10
+ #include <vector>
11
+
12
+ #ifdef LM_GGML_USE_CPU_HBM
13
+ #include "ggml-cpu-hbm.h"
14
+ #endif
15
+
16
+ #ifdef LM_GGML_USE_CPU_KLEIDIAI
17
+ #include "kleidiai/kleidiai.h"
18
+ #endif
19
+
20
+ #if defined(__APPLE__)
21
+ #include <sys/types.h>
22
+ #include <sys/sysctl.h>
23
+ #endif
24
+
25
+ #if defined(_WIN32)
26
+ #define WIN32_LEAN_AND_MEAN
27
+ #ifndef NOMINMAX
28
+ #define NOMINMAX
29
+ #endif
30
+ #include <windows.h>
31
+ #endif
32
+
33
+ // ggml-backend interface
34
+
35
+ std::vector<lm_ggml_backend_buffer_type_t>& lm_ggml_backend_cpu_get_extra_buffers_type() {
36
+ static std::vector<lm_ggml_backend_buffer_type_t> bufts = []() {
37
+ std::vector<lm_ggml_backend_buffer_type_t> bufts;
38
+
39
+ #if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
40
+ if (lm_ggml_backend_amx_buffer_type()) {
41
+ bufts.push_back(lm_ggml_backend_amx_buffer_type());
42
+ }
43
+ #endif
44
+
45
+ #ifdef LM_GGML_USE_CPU_KLEIDIAI
46
+ if (lm_ggml_backend_cpu_kleidiai_buffer_type()) {
47
+ bufts.push_back(lm_ggml_backend_cpu_kleidiai_buffer_type());
48
+ }
49
+ #endif
50
+
51
+ #ifdef LM_GGML_USE_CPU_AARCH64
52
+ if (lm_ggml_backend_cpu_aarch64_buffer_type()) {
53
+ bufts.push_back(lm_ggml_backend_cpu_aarch64_buffer_type());
54
+ }
55
+ #endif
56
+
57
+ bufts.push_back(NULL);
58
+
59
+ return bufts;
60
+ }();
61
+
62
+ return bufts;
63
+ }
64
+
65
+ static lm_ggml_backend_buffer_type_t * lm_ggml_backend_cpu_device_get_extra_buffers_type(lm_ggml_backend_dev_t device) {
66
+ return lm_ggml_backend_cpu_get_extra_buffers_type().data();
67
+
68
+ LM_GGML_UNUSED(device);
69
+ }
70
+
71
+ static bool lm_ggml_backend_cpu_is_extra_buffer_type(lm_ggml_backend_buffer_type_t buft) {
72
+ for (auto extra : lm_ggml_backend_cpu_get_extra_buffers_type()) {
73
+ if (extra && extra == buft) return true;
74
+ }
75
+ return false;
76
+ }
77
+
78
+ // CPU backend - backend (stream)
79
+
80
+ struct lm_ggml_backend_cpu_context {
81
+ int n_threads;
82
+ lm_ggml_threadpool_t threadpool;
83
+
84
+ uint8_t * work_data;
85
+ size_t work_size;
86
+
87
+ lm_ggml_abort_callback abort_callback;
88
+ void * abort_callback_data;
89
+ };
90
+
91
+ static const char * lm_ggml_backend_cpu_get_name(lm_ggml_backend_t backend) {
92
+ return "CPU";
93
+
94
+ LM_GGML_UNUSED(backend);
95
+ }
96
+
97
+ static void lm_ggml_backend_cpu_free(lm_ggml_backend_t backend) {
98
+ struct lm_ggml_backend_cpu_context * cpu_ctx = (struct lm_ggml_backend_cpu_context *)backend->context;
99
+ delete[] cpu_ctx->work_data;
100
+ delete cpu_ctx;
101
+ delete backend;
102
+ }
103
+
104
+ struct lm_ggml_backend_plan_cpu {
105
+ struct lm_ggml_cplan cplan;
106
+ struct lm_ggml_cgraph cgraph;
107
+ };
108
+
109
+ static lm_ggml_backend_graph_plan_t lm_ggml_backend_cpu_graph_plan_create(lm_ggml_backend_t backend, const struct lm_ggml_cgraph * cgraph) {
110
+ struct lm_ggml_backend_cpu_context * cpu_ctx = (struct lm_ggml_backend_cpu_context *)backend->context;
111
+
112
+ struct lm_ggml_backend_plan_cpu * cpu_plan = new lm_ggml_backend_plan_cpu;
113
+
114
+ cpu_plan->cplan = lm_ggml_graph_plan(cgraph, cpu_ctx->n_threads, cpu_ctx->threadpool);
115
+ cpu_plan->cgraph = *cgraph; // FIXME: deep copy
116
+
117
+ if (cpu_plan->cplan.work_size > 0) {
118
+ cpu_plan->cplan.work_data = new uint8_t[cpu_plan->cplan.work_size];
119
+ if (cpu_plan->cplan.work_data == NULL) {
120
+ delete cpu_plan;
121
+ return NULL;
122
+ }
123
+ }
124
+
125
+ cpu_plan->cplan.abort_callback = cpu_ctx->abort_callback;
126
+ cpu_plan->cplan.abort_callback_data = cpu_ctx->abort_callback_data;
127
+
128
+ return cpu_plan;
129
+ }
130
+
131
+ static void lm_ggml_backend_cpu_graph_plan_free(lm_ggml_backend_t backend, lm_ggml_backend_graph_plan_t plan) {
132
+ struct lm_ggml_backend_plan_cpu * cpu_plan = (struct lm_ggml_backend_plan_cpu *)plan;
133
+
134
+ delete[] cpu_plan->cplan.work_data;
135
+ delete cpu_plan;
136
+
137
+ LM_GGML_UNUSED(backend);
138
+ }
139
+
140
+ static enum lm_ggml_status lm_ggml_backend_cpu_graph_plan_compute(lm_ggml_backend_t backend, lm_ggml_backend_graph_plan_t plan) {
141
+ struct lm_ggml_backend_plan_cpu * cpu_plan = (struct lm_ggml_backend_plan_cpu *)plan;
142
+
143
+ return lm_ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan);
144
+
145
+ LM_GGML_UNUSED(backend);
146
+ }
147
+
148
+ static enum lm_ggml_status lm_ggml_backend_cpu_graph_compute(lm_ggml_backend_t backend, struct lm_ggml_cgraph * cgraph) {
149
+ struct lm_ggml_backend_cpu_context * cpu_ctx = (struct lm_ggml_backend_cpu_context *)backend->context;
150
+
151
+ struct lm_ggml_cplan cplan = lm_ggml_graph_plan(cgraph, cpu_ctx->n_threads, cpu_ctx->threadpool);
152
+
153
+ if (cpu_ctx->work_size < cplan.work_size) {
154
+ delete[] cpu_ctx->work_data;
155
+ cpu_ctx->work_data = new uint8_t[cplan.work_size];
156
+ if (cpu_ctx->work_data == NULL) {
157
+ cpu_ctx->work_size = 0;
158
+ return LM_GGML_STATUS_ALLOC_FAILED;
159
+ }
160
+ cpu_ctx->work_size = cplan.work_size;
161
+ }
162
+ cplan.work_data = (uint8_t *)cpu_ctx->work_data;
163
+
164
+ cplan.abort_callback = cpu_ctx->abort_callback;
165
+ cplan.abort_callback_data = cpu_ctx->abort_callback_data;
166
+
167
+ return lm_ggml_graph_compute(cgraph, &cplan);
168
+ }
169
+
170
+ static const struct lm_ggml_backend_i lm_ggml_backend_cpu_i = {
171
+ /* .get_name = */ lm_ggml_backend_cpu_get_name,
172
+ /* .free = */ lm_ggml_backend_cpu_free,
173
+ /* .set_tensor_async = */ NULL,
174
+ /* .get_tensor_async = */ NULL,
175
+ /* .cpy_tensor_async = */ NULL,
176
+ /* .synchronize = */ NULL,
177
+ /* .graph_plan_create = */ lm_ggml_backend_cpu_graph_plan_create,
178
+ /* .graph_plan_free = */ lm_ggml_backend_cpu_graph_plan_free,
179
+ /* .graph_plan_update = */ NULL,
180
+ /* .graph_plan_compute = */ lm_ggml_backend_cpu_graph_plan_compute,
181
+ /* .graph_compute = */ lm_ggml_backend_cpu_graph_compute,
182
+ /* .event_record = */ NULL,
183
+ /* .event_wait = */ NULL,
184
+ };
185
+
186
+ static lm_ggml_guid_t lm_ggml_backend_cpu_guid(void) {
187
+ static lm_ggml_guid guid = { 0xaa, 0x67, 0xc7, 0x43, 0x96, 0xe6, 0xa3, 0x8a, 0xe3, 0xaf, 0xea, 0x92, 0x36, 0xbc, 0xfc, 0x89 };
188
+ return &guid;
189
+ }
190
+
191
+ lm_ggml_backend_t lm_ggml_backend_cpu_init(void) {
192
+ // initialize CPU backend now to avoid slowing the first graph computation
193
+ lm_ggml_cpu_init();
194
+
195
+ struct lm_ggml_backend_cpu_context * ctx = new lm_ggml_backend_cpu_context;
196
+ if (ctx == NULL) {
197
+ return NULL;
198
+ }
199
+
200
+ ctx->n_threads = LM_GGML_DEFAULT_N_THREADS;
201
+ ctx->threadpool = NULL;
202
+ ctx->work_data = NULL;
203
+ ctx->work_size = 0;
204
+ ctx->abort_callback = NULL;
205
+ ctx->abort_callback_data = NULL;
206
+
207
+ lm_ggml_backend_t cpu_backend = new lm_ggml_backend {
208
+ /* .guid = */ lm_ggml_backend_cpu_guid(),
209
+ /* .interface = */ lm_ggml_backend_cpu_i,
210
+ /* .device = */ lm_ggml_backend_reg_dev_get(lm_ggml_backend_cpu_reg(), 0),
211
+ /* .context = */ ctx,
212
+ };
213
+
214
+ if (cpu_backend == NULL) {
215
+ delete ctx;
216
+ return NULL;
217
+ }
218
+
219
+ return cpu_backend;
220
+ }
221
+
222
+ bool lm_ggml_backend_is_cpu(lm_ggml_backend_t backend) {
223
+ return backend != NULL && lm_ggml_guid_matches(backend->guid, lm_ggml_backend_cpu_guid());
224
+ }
225
+
226
+ void lm_ggml_backend_cpu_set_n_threads(lm_ggml_backend_t backend_cpu, int n_threads) {
227
+ LM_GGML_ASSERT(lm_ggml_backend_is_cpu(backend_cpu));
228
+
229
+ struct lm_ggml_backend_cpu_context * ctx = (struct lm_ggml_backend_cpu_context *)backend_cpu->context;
230
+ ctx->n_threads = n_threads;
231
+ }
232
+
233
+ void lm_ggml_backend_cpu_set_threadpool(lm_ggml_backend_t backend_cpu, lm_ggml_threadpool_t threadpool) {
234
+ LM_GGML_ASSERT(lm_ggml_backend_is_cpu(backend_cpu));
235
+
236
+ struct lm_ggml_backend_cpu_context * ctx = (struct lm_ggml_backend_cpu_context *)backend_cpu->context;
237
+
238
+ if (ctx->threadpool && ctx->threadpool != threadpool) {
239
+ // already had a different threadpool, pause/suspend it before switching
240
+ lm_ggml_threadpool_pause(ctx->threadpool);
241
+ }
242
+ ctx->threadpool = threadpool;
243
+ }
244
+
245
+ void lm_ggml_backend_cpu_set_abort_callback(lm_ggml_backend_t backend_cpu, lm_ggml_abort_callback abort_callback, void * abort_callback_data) {
246
+ LM_GGML_ASSERT(lm_ggml_backend_is_cpu(backend_cpu));
247
+
248
+ struct lm_ggml_backend_cpu_context * ctx = (struct lm_ggml_backend_cpu_context *)backend_cpu->context;
249
+ ctx->abort_callback = abort_callback;
250
+ ctx->abort_callback_data = abort_callback_data;
251
+ }
252
+
253
+ // CPU backend - device
254
+
255
+ struct lm_ggml_backend_cpu_device_context {
256
+ std::string description = "CPU";
257
+
258
+ lm_ggml_backend_cpu_device_context() {
259
+ #ifdef __APPLE__
260
+ size_t len = 0;
261
+ if (!sysctlbyname("machdep.cpu.brand_string", NULL, &len, NULL, 0)) {
262
+ description.resize(len);
263
+ sysctlbyname("machdep.cpu.brand_string", &description[0], &len, NULL, 0); // NOLINT
264
+ }
265
+ #elif defined(__linux__)
266
+ FILE * f = fopen("/proc/cpuinfo", "r");
267
+ if (f) {
268
+ char buf[1024];
269
+ while (fgets(buf, sizeof(buf), f)) {
270
+ if (strncmp(buf, "model name", 10) == 0) {
271
+ char * p = strchr(buf, ':');
272
+ if (p) {
273
+ p++;
274
+ while (std::isspace(*p)) {
275
+ p++;
276
+ }
277
+ while (std::isspace(p[strlen(p) - 1])) {
278
+ p[strlen(p) - 1] = '\0';
279
+ }
280
+ description = p;
281
+ break;
282
+ }
283
+ }
284
+ }
285
+ fclose(f);
286
+ }
287
+ #elif defined(_WIN32)
288
+ HKEY hKey;
289
+ if (RegOpenKeyEx(HKEY_LOCAL_MACHINE,
290
+ TEXT("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"),
291
+ 0,
292
+ KEY_READ,
293
+ &hKey) == ERROR_SUCCESS) {
294
+ DWORD cpu_brand_size = 0;
295
+ if (RegQueryValueExA(hKey,
296
+ "ProcessorNameString",
297
+ NULL,
298
+ NULL,
299
+ NULL,
300
+ &cpu_brand_size) == ERROR_SUCCESS) {
301
+ description.resize(cpu_brand_size);
302
+ if (RegQueryValueExA(hKey,
303
+ "ProcessorNameString",
304
+ NULL,
305
+ NULL,
306
+ (LPBYTE)&description[0], // NOLINT
307
+ &cpu_brand_size) == ERROR_SUCCESS) {
308
+ if (description.find('\0') != std::string::npos) {
309
+ description.resize(description.find('\0'));
310
+ }
311
+ }
312
+ }
313
+ RegCloseKey(hKey);
314
+ }
315
+ #endif
316
+ }
317
+ };
318
+
319
+ static const char * lm_ggml_backend_cpu_device_get_name(lm_ggml_backend_dev_t dev) {
320
+ return "CPU";
321
+
322
+ LM_GGML_UNUSED(dev);
323
+ }
324
+
325
+ static const char * lm_ggml_backend_cpu_device_get_description(lm_ggml_backend_dev_t dev) {
326
+ struct lm_ggml_backend_cpu_device_context * ctx = (struct lm_ggml_backend_cpu_device_context *)dev->context;
327
+
328
+ return ctx->description.c_str();
329
+ }
330
+
331
+ static void lm_ggml_backend_cpu_device_get_memory(lm_ggml_backend_dev_t dev, size_t * free, size_t * total) {
332
+ // TODO
333
+ *free = 0;
334
+ *total = 0;
335
+
336
+ LM_GGML_UNUSED(dev);
337
+ }
338
+
339
+ static enum lm_ggml_backend_dev_type lm_ggml_backend_cpu_device_get_type(lm_ggml_backend_dev_t dev) {
340
+ return LM_GGML_BACKEND_DEVICE_TYPE_CPU;
341
+
342
+ LM_GGML_UNUSED(dev);
343
+ }
344
+
345
+ static void lm_ggml_backend_cpu_device_get_props(lm_ggml_backend_dev_t dev, struct lm_ggml_backend_dev_props * props) {
346
+ props->name = lm_ggml_backend_cpu_device_get_name(dev);
347
+ props->description = lm_ggml_backend_cpu_device_get_description(dev);
348
+ props->type = lm_ggml_backend_cpu_device_get_type(dev);
349
+ lm_ggml_backend_cpu_device_get_memory(dev, &props->memory_free, &props->memory_total);
350
+ props->caps = {
351
+ /* .async = */ false,
352
+ /* .host_buffer = */ false,
353
+ /* .buffer_from_host_ptr = */ true,
354
+ /* .events = */ false,
355
+ };
356
+ }
357
+
358
+ static lm_ggml_backend_t lm_ggml_backend_cpu_device_init_backend(lm_ggml_backend_dev_t dev, const char * params) {
359
+ return lm_ggml_backend_cpu_init();
360
+
361
+ LM_GGML_UNUSED(dev);
362
+ LM_GGML_UNUSED(params);
363
+ }
364
+
365
+ static lm_ggml_backend_buffer_type_t lm_ggml_backend_cpu_device_get_buffer_type(lm_ggml_backend_dev_t dev) {
366
+ return lm_ggml_backend_cpu_buffer_type();
367
+
368
+ LM_GGML_UNUSED(dev);
369
+ }
370
+
371
+ static lm_ggml_backend_buffer_t lm_ggml_backend_cpu_device_buffer_from_host_ptr(lm_ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
372
+ return lm_ggml_backend_cpu_buffer_from_ptr(ptr, size);
373
+
374
+ LM_GGML_UNUSED(dev);
375
+ LM_GGML_UNUSED(max_tensor_size);
376
+ }
377
+
378
+ static bool lm_ggml_backend_cpu_device_supports_op(lm_ggml_backend_dev_t dev, const struct lm_ggml_tensor * op) {
379
+ const struct lm_ggml_tensor * src0 = op->src[0];
380
+ const struct lm_ggml_tensor * src1 = op->src[1];
381
+
382
+ if (op->op == LM_GGML_OP_NONE || op->op == LM_GGML_OP_RESHAPE || op->op == LM_GGML_OP_VIEW || op->op == LM_GGML_OP_PERMUTE || op->op == LM_GGML_OP_TRANSPOSE) {
383
+ return true;
384
+ }
385
+
386
+ // extra_buffer_op?
387
+ for (auto extra : lm_ggml_backend_cpu_get_extra_buffers_type()) {
388
+ if (extra) {
389
+ auto buf_extra = (ggml::cpu::extra_buffer_type*) extra->context;
390
+ if (buf_extra && buf_extra->supports_op(dev, op)) {
391
+ return true;
392
+ }
393
+ }
394
+ }
395
+
396
+ // the other case need host buffer.
397
+ for (int i = 0; i < LM_GGML_MAX_SRC; i++) {
398
+ if (op->src[i] && op->src[i]->buffer && !lm_ggml_backend_buft_is_host(op->src[i]->buffer->buft)) {
399
+ return false;
400
+ }
401
+ }
402
+
403
+ switch (op->op) {
404
+ case LM_GGML_OP_CPY:
405
+ return
406
+ op->type != LM_GGML_TYPE_IQ3_XXS &&
407
+ op->type != LM_GGML_TYPE_IQ3_S &&
408
+ op->type != LM_GGML_TYPE_IQ2_XXS &&
409
+ op->type != LM_GGML_TYPE_IQ2_XS &&
410
+ op->type != LM_GGML_TYPE_IQ2_S &&
411
+ op->type != LM_GGML_TYPE_IQ1_S &&
412
+ op->type != LM_GGML_TYPE_IQ1_M; // missing type_traits.from_float
413
+ case LM_GGML_OP_MUL_MAT:
414
+ return src1->type == LM_GGML_TYPE_F32 || src1->type == lm_ggml_get_type_traits_cpu(src0->type)->vec_dot_type;
415
+ case LM_GGML_OP_SOFT_MAX_BACK: {
416
+ if (op->src[0]->type != LM_GGML_TYPE_F32 || op->src[1]->type != LM_GGML_TYPE_F32) {
417
+ return false;
418
+ }
419
+ float max_bias = 0.0f;
420
+
421
+ memcpy(&max_bias, (const float *) op->op_params + 1, sizeof(float));
422
+
423
+ return max_bias == 0.0f;
424
+ }
425
+ case LM_GGML_OP_IM2COL_BACK:
426
+ return src0->type == LM_GGML_TYPE_F32 && src1->type == LM_GGML_TYPE_F32;
427
+ case LM_GGML_OP_OUT_PROD:
428
+ return (src0->type == LM_GGML_TYPE_F32 || (lm_ggml_is_quantized(src0->type) && src0->ne[2] == src1->ne[2] && src0->ne[3] == src1->ne[3])) &&
429
+ src1->type == LM_GGML_TYPE_F32 && op->type == LM_GGML_TYPE_F32;
430
+ default:
431
+ return true;
432
+ }
433
+ }
434
+
435
+ static bool lm_ggml_backend_cpu_device_supports_buft(lm_ggml_backend_dev_t dev, lm_ggml_backend_buffer_type_t buft) {
436
+ return lm_ggml_backend_buft_is_host(buft) || lm_ggml_backend_cpu_is_extra_buffer_type(buft);
437
+ LM_GGML_UNUSED(dev);
438
+ }
439
+
440
+ static const struct lm_ggml_backend_device_i lm_ggml_backend_cpu_device_i = {
441
+ /* .get_name = */ lm_ggml_backend_cpu_device_get_name,
442
+ /* .get_description = */ lm_ggml_backend_cpu_device_get_description,
443
+ /* .get_memory = */ lm_ggml_backend_cpu_device_get_memory,
444
+ /* .get_type = */ lm_ggml_backend_cpu_device_get_type,
445
+ /* .get_props = */ lm_ggml_backend_cpu_device_get_props,
446
+ /* .init_backend = */ lm_ggml_backend_cpu_device_init_backend,
447
+ /* .get_buffer_type = */ lm_ggml_backend_cpu_device_get_buffer_type,
448
+ /* .get_host_buffer_type = */ NULL,
449
+ /* .buffer_from_host_ptr = */ lm_ggml_backend_cpu_device_buffer_from_host_ptr,
450
+ /* .supports_op = */ lm_ggml_backend_cpu_device_supports_op,
451
+ /* .supports_buft = */ lm_ggml_backend_cpu_device_supports_buft,
452
+ /* .offload_op = */ NULL,
453
+ /* .event_new = */ NULL,
454
+ /* .event_free = */ NULL,
455
+ /* .event_synchronize = */ NULL,
456
+ };
457
+
458
+ // CPU backend - backend (reg)
459
+
460
+ static const char * lm_ggml_backend_cpu_reg_get_name(lm_ggml_backend_reg_t reg) {
461
+ return "CPU";
462
+
463
+ LM_GGML_UNUSED(reg);
464
+ }
465
+
466
+ static size_t lm_ggml_backend_cpu_reg_get_device_count(lm_ggml_backend_reg_t reg) {
467
+ return 1;
468
+
469
+ LM_GGML_UNUSED(reg);
470
+ }
471
+
472
+ static lm_ggml_backend_dev_t lm_ggml_backend_cpu_reg_get_device(lm_ggml_backend_reg_t reg, size_t index) {
473
+ LM_GGML_ASSERT(index == 0);
474
+
475
+ static lm_ggml_backend_cpu_device_context ctx;
476
+ static lm_ggml_backend_device lm_ggml_backend_cpu_device = {
477
+ /* .iface = */ lm_ggml_backend_cpu_device_i,
478
+ /* .reg = */ reg,
479
+ /* .context = */ &ctx,
480
+ };
481
+
482
+ return &lm_ggml_backend_cpu_device;
483
+ }
484
+
485
+ // This is intended to replace the the lm_ggml_cpu_has_* functions when loading the CPU backend dynamically,
486
+ // and additionally to allow other backends to expose their own list of features that applications can query using the same API
487
+ static lm_ggml_backend_feature * lm_ggml_backend_cpu_get_features(lm_ggml_backend_reg_t reg) {
488
+ static std::vector<lm_ggml_backend_feature> features = []() {
489
+ lm_ggml_cpu_init();
490
+
491
+ std::vector<lm_ggml_backend_feature> features;
492
+ if (lm_ggml_cpu_has_sse3()) {
493
+ features.push_back({ "SSE3", "1" });
494
+ }
495
+ if (lm_ggml_cpu_has_ssse3()) {
496
+ features.push_back({ "SSSE3", "1" });
497
+ }
498
+ if (lm_ggml_cpu_has_avx()) {
499
+ features.push_back({ "AVX", "1" });
500
+ }
501
+ if (lm_ggml_cpu_has_avx_vnni()) {
502
+ features.push_back({ "AVX_VNNI", "1" });
503
+ }
504
+ if (lm_ggml_cpu_has_avx2()) {
505
+ features.push_back({ "AVX2", "1" });
506
+ }
507
+ if (lm_ggml_cpu_has_f16c()) {
508
+ features.push_back({ "F16C", "1" });
509
+ }
510
+ if (lm_ggml_cpu_has_fma()) {
511
+ features.push_back({ "FMA", "1" });
512
+ }
513
+ if (lm_ggml_cpu_has_bmi2()) {
514
+ features.push_back({ "BMI2", "1" });
515
+ }
516
+ if (lm_ggml_cpu_has_avx512()) {
517
+ features.push_back({ "AVX512", "1" });
518
+ }
519
+ if (lm_ggml_cpu_has_avx512_vbmi()) {
520
+ features.push_back({ "AVX512_VBMI", "1" });
521
+ }
522
+ if (lm_ggml_cpu_has_avx512_vnni()) {
523
+ features.push_back({ "AVX512_VNNI", "1" });
524
+ }
525
+ if (lm_ggml_cpu_has_avx512_bf16()) {
526
+ features.push_back({ "AVX512_BF16", "1" });
527
+ }
528
+ if (lm_ggml_cpu_has_amx_int8()) {
529
+ features.push_back({ "AMX_INT8", "1" });
530
+ }
531
+ if (lm_ggml_cpu_has_neon()) {
532
+ features.push_back({ "NEON", "1" });
533
+ }
534
+ if (lm_ggml_cpu_has_arm_fma()) {
535
+ features.push_back({ "ARM_FMA", "1" });
536
+ }
537
+ if (lm_ggml_cpu_has_fp16_va()) {
538
+ features.push_back({ "FP16_VA", "1" });
539
+ }
540
+ if (lm_ggml_cpu_has_matmul_int8()) {
541
+ features.push_back({ "MATMUL_INT8", "1" });
542
+ }
543
+ if (lm_ggml_cpu_has_sve()) {
544
+ features.push_back({ "SVE", "1" });
545
+ }
546
+ if (lm_ggml_cpu_has_dotprod()) {
547
+ features.push_back({ "DOTPROD", "1" });
548
+ }
549
+ if (lm_ggml_cpu_get_sve_cnt() > 0) {
550
+ static std::string sve_cnt = std::to_string(lm_ggml_cpu_get_sve_cnt());
551
+ features.push_back({ "SVE_CNT", sve_cnt.c_str() });
552
+ }
553
+ if (lm_ggml_cpu_has_sme()) {
554
+ features.push_back({ "SME", "1" });
555
+ }
556
+ if (lm_ggml_cpu_has_riscv_v()) {
557
+ features.push_back({ "RISCV_V", "1" });
558
+ }
559
+ if (lm_ggml_cpu_has_vsx()) {
560
+ features.push_back({ "VSX", "1" });
561
+ }
562
+ if (lm_ggml_cpu_has_vxe()) {
563
+ features.push_back({ "VXE", "1" });
564
+ }
565
+ if (lm_ggml_cpu_has_wasm_simd()) {
566
+ features.push_back({ "WASM_SIMD", "1" });
567
+ }
568
+ if (lm_ggml_cpu_has_llamafile()) {
569
+ features.push_back({ "LLAMAFILE", "1" });
570
+ }
571
+ #ifdef LM_GGML_USE_ACCELERATE
572
+ features.push_back({ "ACCELERATE", "1" });
573
+ #endif
574
+ #ifdef LM_GGML_USE_CPU_HBM
575
+ features.push_back({ "CPU_HBM", "1" });
576
+ #endif
577
+ #ifdef LM_GGML_USE_OPENMP
578
+ features.push_back({ "OPENMP", "1" });
579
+ #endif
580
+ #ifdef LM_GGML_USE_CPU_KLEIDIAI
581
+ features.push_back({ "KLEIDIAI", "1" });
582
+ #endif
583
+ #ifdef LM_GGML_USE_CPU_AARCH64
584
+ features.push_back({ "AARCH64_REPACK", "1" });
585
+ #endif
586
+
587
+ features.push_back({ nullptr, nullptr });
588
+
589
+ return features;
590
+ }();
591
+
592
+ return features.data();
593
+
594
+ LM_GGML_UNUSED(reg);
595
+ }
596
+
597
+ static void * lm_ggml_backend_cpu_get_proc_address(lm_ggml_backend_reg_t reg, const char * name) {
598
+ if (strcmp(name, "lm_ggml_backend_set_n_threads") == 0) {
599
+ lm_ggml_backend_set_n_threads_t fct = lm_ggml_backend_cpu_set_n_threads;
600
+ return (void *)fct;
601
+ }
602
+ if (strcmp(name, "lm_ggml_backend_dev_get_extra_bufts") == 0) {
603
+ lm_ggml_backend_dev_get_extra_bufts_t fct = lm_ggml_backend_cpu_device_get_extra_buffers_type;
604
+ return (void *)fct;
605
+ }
606
+ if (strcmp(name, "lm_ggml_backend_get_features") == 0) {
607
+ return (void *)lm_ggml_backend_cpu_get_features;
608
+ }
609
+ if (strcmp(name, "lm_ggml_backend_set_abort_callback") == 0) {
610
+ return (void *)lm_ggml_backend_cpu_set_abort_callback;
611
+ }
612
+ if (strcmp(name, "lm_ggml_backend_cpu_numa_init") == 0) {
613
+ return (void *)lm_ggml_numa_init;
614
+ }
615
+ if (strcmp(name, "lm_ggml_backend_cpu_is_numa") == 0) {
616
+ return (void *)lm_ggml_is_numa;
617
+ }
618
+
619
+ // threadpool - TODO: move to ggml-base
620
+ if (strcmp(name, "lm_ggml_threadpool_new") == 0) {
621
+ return (void *)lm_ggml_threadpool_new;
622
+ }
623
+ if (strcmp(name, "lm_ggml_threadpool_free") == 0) {
624
+ return (void *)lm_ggml_threadpool_free;
625
+ }
626
+ if (strcmp(name, "lm_ggml_backend_cpu_set_threadpool") == 0) {
627
+ return (void *)lm_ggml_backend_cpu_set_threadpool;
628
+ }
629
+
630
+ return NULL;
631
+
632
+ LM_GGML_UNUSED(reg);
633
+ }
634
+
635
+ static const struct lm_ggml_backend_reg_i lm_ggml_backend_cpu_reg_i = {
636
+ /* .get_name = */ lm_ggml_backend_cpu_reg_get_name,
637
+ /* .get_device_count = */ lm_ggml_backend_cpu_reg_get_device_count,
638
+ /* .get_device = */ lm_ggml_backend_cpu_reg_get_device,
639
+ /* .get_proc_address = */ lm_ggml_backend_cpu_get_proc_address,
640
+ };
641
+
642
+ lm_ggml_backend_reg_t lm_ggml_backend_cpu_reg(void) {
643
+ // init CPU feature detection
644
+ lm_ggml_cpu_init();
645
+
646
+ static struct lm_ggml_backend_reg lm_ggml_backend_cpu_reg = {
647
+ /* .api_version = */ LM_GGML_BACKEND_API_VERSION,
648
+ /* .iface = */ lm_ggml_backend_cpu_reg_i,
649
+ /* .context = */ NULL,
650
+ };
651
+
652
+ return &lm_ggml_backend_cpu_reg;
653
+ }
654
+
655
+ LM_GGML_BACKEND_DL_IMPL(lm_ggml_backend_cpu_reg)