@fugood/llama.node 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. package/CMakeLists.txt +2 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/package.json +1 -1
  17. package/src/DetokenizeWorker.cpp +1 -1
  18. package/src/EmbeddingWorker.cpp +2 -2
  19. package/src/LlamaCompletionWorker.cpp +8 -8
  20. package/src/LlamaCompletionWorker.h +2 -2
  21. package/src/LlamaContext.cpp +8 -9
  22. package/src/TokenizeWorker.cpp +1 -1
  23. package/src/common.hpp +4 -4
  24. package/src/llama.cpp/.github/workflows/build.yml +43 -9
  25. package/src/llama.cpp/.github/workflows/docker.yml +3 -0
  26. package/src/llama.cpp/CMakeLists.txt +7 -4
  27. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  28. package/src/llama.cpp/common/CMakeLists.txt +0 -2
  29. package/src/llama.cpp/common/arg.cpp +642 -607
  30. package/src/llama.cpp/common/arg.h +22 -22
  31. package/src/llama.cpp/common/common.cpp +79 -281
  32. package/src/llama.cpp/common/common.h +130 -100
  33. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  34. package/src/llama.cpp/common/log.cpp +50 -50
  35. package/src/llama.cpp/common/log.h +18 -18
  36. package/src/llama.cpp/common/ngram-cache.cpp +36 -36
  37. package/src/llama.cpp/common/ngram-cache.h +19 -19
  38. package/src/llama.cpp/common/sampling.cpp +116 -108
  39. package/src/llama.cpp/common/sampling.h +20 -20
  40. package/src/llama.cpp/docs/build.md +37 -17
  41. package/src/llama.cpp/examples/CMakeLists.txt +1 -1
  42. package/src/llama.cpp/examples/batched/batched.cpp +14 -14
  43. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
  44. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
  45. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
  46. package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
  47. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
  48. package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
  49. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
  50. package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
  51. package/src/llama.cpp/examples/imatrix/imatrix.cpp +20 -11
  52. package/src/llama.cpp/examples/infill/infill.cpp +40 -86
  53. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +42 -151
  54. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  55. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
  56. package/src/llama.cpp/examples/llava/clip.cpp +1 -0
  57. package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
  58. package/src/llama.cpp/examples/llava/llava.cpp +37 -3
  59. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
  60. package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
  61. package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
  62. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  63. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +14 -14
  64. package/src/llama.cpp/examples/lookup/lookup.cpp +29 -29
  65. package/src/llama.cpp/examples/main/main.cpp +64 -109
  66. package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
  67. package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
  68. package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
  69. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
  70. package/src/llama.cpp/examples/retrieval/retrieval.cpp +13 -13
  71. package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
  72. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +34 -17
  73. package/src/llama.cpp/examples/server/CMakeLists.txt +4 -13
  74. package/src/llama.cpp/examples/server/server.cpp +553 -691
  75. package/src/llama.cpp/examples/server/utils.hpp +312 -25
  76. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  77. package/src/llama.cpp/examples/simple/simple.cpp +128 -96
  78. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
  79. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
  80. package/src/llama.cpp/examples/speculative/speculative.cpp +54 -51
  81. package/src/llama.cpp/examples/tokenize/tokenize.cpp +2 -2
  82. package/src/llama.cpp/ggml/CMakeLists.txt +15 -9
  83. package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
  84. package/src/llama.cpp/ggml/include/ggml-backend.h +46 -33
  85. package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
  86. package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
  87. package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
  88. package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
  89. package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
  90. package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
  91. package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
  92. package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
  93. package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
  94. package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
  95. package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
  96. package/src/llama.cpp/ggml/include/ggml.h +53 -393
  97. package/src/llama.cpp/ggml/src/CMakeLists.txt +66 -1149
  98. package/src/llama.cpp/ggml/src/ggml-aarch64.c +46 -3126
  99. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
  100. package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -27
  101. package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
  102. package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
  103. package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  104. package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
  105. package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
  106. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +6 -25
  107. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
  108. package/src/llama.cpp/ggml/src/ggml-backend.cpp +303 -864
  109. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
  110. package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +213 -65
  111. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
  112. package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +255 -149
  113. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
  114. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
  115. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
  116. package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -243
  117. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
  118. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  119. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
  120. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
  121. package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +667 -1
  122. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
  123. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
  124. package/src/llama.cpp/ggml/src/ggml-impl.h +366 -16
  125. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
  126. package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +238 -72
  127. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
  128. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
  129. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
  130. package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
  131. package/src/llama.cpp/ggml/src/ggml-quants.c +187 -10692
  132. package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
  133. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
  134. package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +475 -300
  135. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
  136. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
  137. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +40 -0
  138. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +258 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
  140. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +2 -22
  141. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
  142. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
  143. package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3584 -4142
  144. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +69 -67
  145. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +3 -3
  146. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
  147. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
  148. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
  149. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
  150. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
  151. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
  152. package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  153. package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
  154. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
  155. package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +555 -623
  156. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +125 -206
  157. package/src/llama.cpp/ggml/src/ggml.c +4032 -19890
  158. package/src/llama.cpp/include/llama.h +67 -33
  159. package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
  160. package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
  161. package/src/llama.cpp/src/CMakeLists.txt +2 -1
  162. package/src/llama.cpp/src/llama-sampling.cpp +745 -105
  163. package/src/llama.cpp/src/llama-sampling.h +21 -2
  164. package/src/llama.cpp/src/llama-vocab.cpp +49 -9
  165. package/src/llama.cpp/src/llama-vocab.h +35 -11
  166. package/src/llama.cpp/src/llama.cpp +2636 -2406
  167. package/src/llama.cpp/src/unicode-data.cpp +2 -2
  168. package/src/llama.cpp/tests/CMakeLists.txt +1 -2
  169. package/src/llama.cpp/tests/test-arg-parser.cpp +14 -14
  170. package/src/llama.cpp/tests/test-backend-ops.cpp +185 -60
  171. package/src/llama.cpp/tests/test-barrier.cpp +1 -0
  172. package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
  173. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
  174. package/src/llama.cpp/tests/test-log.cpp +2 -2
  175. package/src/llama.cpp/tests/test-opt.cpp +853 -142
  176. package/src/llama.cpp/tests/test-quantize-fns.cpp +22 -19
  177. package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
  178. package/src/llama.cpp/tests/test-rope.cpp +1 -0
  179. package/src/llama.cpp/tests/test-sampling.cpp +162 -137
  180. package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
  181. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
  182. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
  183. package/src/llama.cpp/common/train.cpp +0 -1515
  184. package/src/llama.cpp/common/train.h +0 -233
  185. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
  186. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
  187. package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
  188. /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
  189. /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
  190. /package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +0 -0
@@ -39,6 +39,8 @@
39
39
 
40
40
  #include "ggml-common.h"
41
41
 
42
+ #define GGML_CANN_NAME "CANN"
43
+
42
44
  /**
43
45
  * @brief Handles CANN errors by printing an error message and aborting.
44
46
  *
@@ -487,23 +489,6 @@ struct ggml_backend_cann_buffer_context {
487
489
  ~ggml_backend_cann_buffer_context() { ACL_CHECK(aclrtFree(dev_ptr)); }
488
490
  };
489
491
 
490
- /**
491
- * @brief Retrieve the name associated with a CANN buffer.
492
- *
493
- * This function returns the name of a CANN buffer, which is stored in the
494
- * context of the buffer.
495
- *
496
- * @param buffer The CANN buffer whose name is to be retrieved.
497
- * @return A pointer to a C-string containing the name of the buffer.
498
- */
499
-
500
- static const char* ggml_backend_cann_buffer_get_name(
501
- ggml_backend_buffer_t buffer) {
502
- return "CANN";
503
-
504
- GGML_UNUSED(buffer);
505
- }
506
-
507
492
  /**
508
493
  * @brief Check if a buffer is a CANN buffer.
509
494
  *
@@ -513,9 +498,10 @@ static const char* ggml_backend_cann_buffer_get_name(
513
498
  * @param buffer The buffer to check.
514
499
  * @return true if the buffer is a CANN buffer, false otherwise.
515
500
  */
501
+ static bool ggml_backend_buft_is_cann(ggml_backend_buffer_type_t buft);
516
502
  static bool ggml_backend_buffer_is_cann(
517
503
  ggml_backend_buffer_t buffer) {
518
- return buffer->iface.get_name == ggml_backend_cann_buffer_get_name;
504
+ return ggml_backend_buft_is_cann(buffer->buft);
519
505
  }
520
506
 
521
507
  /**
@@ -851,13 +837,6 @@ static void ggml_backend_cann_buffer_set_tensor(
851
837
  void *transform_buffer = malloc(size);
852
838
  ggml_backend_cann_transform(tensor, data, transform_buffer);
853
839
 
854
- #ifndef NDEBUG
855
- void *check_buffer = malloc(size);
856
- ggml_backend_cann_transform_back(tensor, transform_buffer,
857
- check_buffer);
858
- GGML_ASSERT(memcmp(data, check_buffer, size) == 0);
859
- free(check_buffer);
860
- #endif
861
840
  ACL_CHECK(aclrtMemcpy((char *)tensor->data + offset, size,
862
841
  transform_buffer, size,
863
842
  ACL_MEMCPY_HOST_TO_DEVICE));
@@ -969,8 +948,7 @@ static void ggml_backend_cann_buffer_clear(
969
948
  * This structure defines function pointers to operations that can be performed
970
949
  * on a CANN buffer within the backend.
971
950
  */
972
- static ggml_backend_buffer_i ggml_backend_cann_buffer_interface = {
973
- /* .get_name = */ ggml_backend_cann_buffer_get_name,
951
+ static const ggml_backend_buffer_i ggml_backend_cann_buffer_interface = {
974
952
  /* .free_buffer = */ ggml_backend_cann_buffer_free_buffer,
975
953
  /* .get_base = */ ggml_backend_cann_buffer_get_base,
976
954
  /* .init_tensor = */ ggml_backend_cann_buffer_init_tensor,
@@ -1004,9 +982,10 @@ struct ggml_backend_cann_buffer_type_context {
1004
982
  */
1005
983
  static const char* ggml_backend_cann_buffer_type_name(
1006
984
  ggml_backend_buffer_type_t buft) {
1007
- return "CANN";
985
+ ggml_backend_cann_buffer_type_context* buft_ctx =
986
+ (ggml_backend_cann_buffer_type_context*)buft->context;
1008
987
 
1009
- GGML_UNUSED(buft);
988
+ return buft_ctx->name.c_str();
1010
989
  }
1011
990
 
1012
991
  /**
@@ -1105,19 +1084,25 @@ static size_t ggml_backend_cann_buffer_type_get_alloc_size(
1105
1084
  GGML_UNUSED(buft);
1106
1085
  }
1107
1086
 
1087
+ static bool ggml_backend_cann_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
1088
+ return false;
1089
+
1090
+ GGML_UNUSED(buft);
1091
+ }
1092
+
1108
1093
  /**
1109
1094
  * @brief Interface for managing CANN buffer types in the GGML backend.
1110
1095
  *
1111
1096
  * Provides function pointers for allocating, querying properties, and managing
1112
1097
  * memory for CANN buffer types in the GGML backend.
1113
1098
  */
1114
- static ggml_backend_buffer_type_i ggml_backend_cann_buffer_type_interface = {
1099
+ static const ggml_backend_buffer_type_i ggml_backend_cann_buffer_type_interface = {
1115
1100
  /* .get_name = */ ggml_backend_cann_buffer_type_name,
1116
1101
  /* .alloc_buffer = */ ggml_backend_cann_buffer_type_alloc_buffer,
1117
1102
  /* .get_alignment = */ ggml_backend_cann_buffer_type_get_alignment,
1118
1103
  /* .get_max_size = */ NULL, // defaults to SIZE_MAX
1119
1104
  /* .get_alloc_size = */ ggml_backend_cann_buffer_type_get_alloc_size,
1120
- /* .is_host = */ NULL,
1105
+ /* .is_host = */ ggml_backend_cann_buffer_type_is_host,
1121
1106
  };
1122
1107
 
1123
1108
  /**
@@ -1148,6 +1133,7 @@ ggml_backend_cann_buffer_type(int32_t device) {
1148
1133
  for (int32_t i = 0; i < GGML_CANN_MAX_DEVICES; i++) {
1149
1134
  ggml_backend_cann_buffer_types[i] = {
1150
1135
  /* .iface = */ ggml_backend_cann_buffer_type_interface,
1136
+ /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), device),
1151
1137
  /* .context = */
1152
1138
  new ggml_backend_cann_buffer_type_context{
1153
1139
  i, "CANN" + std::to_string(i)},
@@ -1241,7 +1227,6 @@ static ggml_backend_buffer_t ggml_backend_cann_host_buffer_type_alloc_buffer(ggm
1241
1227
 
1242
1228
  ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(hostPtr, size);
1243
1229
  buffer->buft = buft;
1244
- buffer->iface.get_name = ggml_backend_cann_host_buffer_name;
1245
1230
  buffer->iface.free_buffer = ggml_backend_cann_host_buffer_free;
1246
1231
 
1247
1232
  return buffer;
@@ -1263,7 +1248,7 @@ ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type() {
1263
1248
  /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
1264
1249
  /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
1265
1250
  },
1266
- /* .device = */ nullptr,
1251
+ /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), 0),
1267
1252
  /* .context = */ nullptr,
1268
1253
  };
1269
1254
 
@@ -1463,24 +1448,6 @@ static void ggml_backend_cann_free(ggml_backend_t backend) {
1463
1448
  delete backend;
1464
1449
  }
1465
1450
 
1466
- /**
1467
- * @brief Retrieves the default buffer type associated with the CANN backend.
1468
- *
1469
- * This function returns the buffer type specific to the device associated
1470
- * with the CANN backend. It is used to allocate buffers for computations
1471
- * performed by the backend.
1472
- *
1473
- * @param backend Pointer to the CANN backend structure.
1474
- * @return Pointer to the buffer type structure for the CANN backend.
1475
- */
1476
- static ggml_backend_buffer_type_t
1477
- ggml_backend_cann_get_default_buffer_type(ggml_backend_t backend) {
1478
- ggml_backend_cann_context* cann_ctx =
1479
- (ggml_backend_cann_context*)backend->context;
1480
-
1481
- return ggml_backend_cann_buffer_type(cann_ctx->device);
1482
- }
1483
-
1484
1451
  /**
1485
1452
  * @brief Sets tensor data asynchronously in the CANN backend.
1486
1453
  *
@@ -1510,13 +1477,6 @@ static void ggml_backend_cann_set_tensor_async(ggml_backend_t backend,
1510
1477
  void *transform_buffer = malloc(size);
1511
1478
  ggml_backend_cann_transform(tensor, data, transform_buffer);
1512
1479
 
1513
- #ifndef NDEBUG
1514
- void *check_buffer = malloc(size);
1515
- ggml_backend_cann_transform_back(tensor, transform_buffer,
1516
- check_buffer);
1517
- GGML_ASSERT(memcmp(data, check_buffer, size));
1518
- free(check_buffer);
1519
- #endif
1520
1480
  ACL_CHECK(aclrtMemcpyAsync(
1521
1481
  (char *)tensor->data + offset, size, transform_buffer, size,
1522
1482
  ACL_MEMCPY_HOST_TO_DEVICE, cann_ctx->stream()));
@@ -1691,7 +1651,7 @@ static enum ggml_status ggml_backend_cann_graph_compute(
1691
1651
  * @return bool Returns true if the operation is supported by the backend,
1692
1652
  * otherwise false.
1693
1653
  */
1694
- static bool ggml_backend_cann_supports_op(ggml_backend_t backend,
1654
+ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
1695
1655
  const ggml_tensor* op) {
1696
1656
  switch (op->op) {
1697
1657
  case GGML_OP_UNARY:
@@ -1782,7 +1742,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_t backend,
1782
1742
  return false;
1783
1743
  }
1784
1744
 
1785
- GGML_UNUSED(backend);
1745
+ GGML_UNUSED(dev);
1786
1746
  }
1787
1747
 
1788
1748
  /**
@@ -1800,31 +1760,6 @@ static bool ggml_backend_buft_is_cann(ggml_backend_buffer_type_t buft) {
1800
1760
  return buft->iface.get_name == ggml_backend_cann_buffer_type_name;
1801
1761
  }
1802
1762
 
1803
- /**
1804
- * @brief Checks if the CANN backend supports a specific backend buffer type.
1805
- *
1806
- * This function determines whether the CANN backend supports the given backend
1807
- * buffer type by comparing the device context of the backend and buffer type.
1808
- * It returns true if the devices are same between the backend context and
1809
- * buffer type context.
1810
- *
1811
- * @param backend Pointer to the CANN backend.
1812
- * @param buft Pointer to the backend buffer type to check.
1813
- * @return bool Returns true if the CANN backend supports the buffer type,
1814
- * otherwise false.
1815
- */
1816
- static bool ggml_backend_cann_supports_buft(
1817
- ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
1818
- if (ggml_backend_buft_is_cann(buft)) {
1819
- ggml_backend_cann_context * cann_ctx =
1820
- (ggml_backend_cann_context *)backend->context;
1821
- ggml_backend_cann_buffer_type_context * buft_ctx =
1822
- (ggml_backend_cann_buffer_type_context *)buft->context;
1823
- return buft_ctx->device == cann_ctx->device;
1824
- }
1825
- return false;
1826
- }
1827
-
1828
1763
  /**
1829
1764
  * @brief Determines if a tensor operation should be offloaded to the CANN
1830
1765
  * backend.
@@ -1839,54 +1774,14 @@ static bool ggml_backend_cann_supports_buft(
1839
1774
  * @return bool Returns true if the operation should be offloaded, otherwise
1840
1775
  * false.
1841
1776
  */
1842
- static bool ggml_backend_cann_offload_op(ggml_backend_t backend,
1777
+ static bool ggml_backend_cann_offload_op(ggml_backend_dev_t dev,
1843
1778
  const ggml_tensor* op) {
1844
1779
  const int min_batch_size = 32;
1845
- GGML_UNUSED(backend);
1780
+ GGML_UNUSED(dev);
1846
1781
 
1847
1782
  return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS;
1848
1783
  }
1849
1784
 
1850
- /**
1851
- * @brief Creates a new event for the CANN backend.
1852
- *
1853
- * This function initializes a new event for the CANN backend by setting the
1854
- * device and creating an ACL runtime event. The created event is then wrapped
1855
- * in a ggml_backend_event structure and returned.
1856
- *
1857
- * @param backend Pointer to the CANN backend.
1858
- * @return ggml_backend_event_t Returns a pointer to the new event structure.
1859
- */
1860
- static ggml_backend_event_t ggml_backend_cann_event_new(
1861
- ggml_backend_t backend) {
1862
- ggml_backend_cann_context* cann_ctx =
1863
- (ggml_backend_cann_context*)backend->context;
1864
-
1865
- ggml_cann_set_device(cann_ctx->device);
1866
-
1867
- aclrtEvent event;
1868
- ACL_CHECK(aclrtCreateEvent(&event));
1869
-
1870
- return new ggml_backend_event{
1871
- /* .backend = */ backend,
1872
- /* .context = */ event,
1873
- };
1874
- }
1875
-
1876
- /**
1877
- * @brief Frees a CANN backend event.
1878
- *
1879
- * This function destroys the ACL runtime event associated with the given CANN
1880
- * backend event and then deletes the event structure itself.
1881
- *
1882
- * @param event Pointer to the event structure to be freed.
1883
- */
1884
- static void ggml_backend_cann_event_free(ggml_backend_event_t event) {
1885
- ACL_CHECK(aclrtDestroyEvent((aclrtEvent)event->context));
1886
-
1887
- delete event;
1888
- }
1889
-
1890
1785
  /**
1891
1786
  * @brief Records an event on the CANN backend stream.
1892
1787
  *
@@ -1895,10 +1790,9 @@ static void ggml_backend_cann_event_free(ggml_backend_event_t event) {
1895
1790
  *
1896
1791
  * @param event Pointer to the event structure to be recorded.
1897
1792
  */
1898
- static void ggml_backend_cann_event_record(ggml_backend_event_t event) {
1793
+ static void ggml_backend_cann_event_record(ggml_backend_t backend, ggml_backend_event_t event) {
1899
1794
  ggml_backend_cann_context* cann_ctx =
1900
- (ggml_backend_cann_context*)event->backend->context;
1901
-
1795
+ (ggml_backend_cann_context*)backend->context;
1902
1796
  ACL_CHECK(aclrtRecordEvent((aclrtEvent)event->context, cann_ctx->stream()));
1903
1797
  }
1904
1798
 
@@ -1916,8 +1810,7 @@ static void ggml_backend_cann_event_wait(ggml_backend_t backend,
1916
1810
  ggml_backend_event_t event) {
1917
1811
  ggml_backend_cann_context* cann_ctx =
1918
1812
  (ggml_backend_cann_context*)backend->context;
1919
-
1920
- if (ggml_backend_is_cann(event->backend)) {
1813
+ if (ggml_backend_is_cann(backend)) {
1921
1814
  ACL_CHECK(aclrtStreamWaitEvent(cann_ctx->stream(),
1922
1815
  (aclrtEvent)event->context));
1923
1816
  } else {
@@ -1925,17 +1818,6 @@ static void ggml_backend_cann_event_wait(ggml_backend_t backend,
1925
1818
  }
1926
1819
  }
1927
1820
 
1928
- /**
1929
- * @brief Synchronizes the given event on the CANN backend.
1930
- *
1931
- * This function waits for the specified event to complete on the ACL runtime.
1932
- *
1933
- * @param event Pointer to the event structure to be synchronized.
1934
- */
1935
- static void ggml_backend_cann_event_synchronize(ggml_backend_event_t event) {
1936
- ACL_CHECK(aclrtSynchronizeEvent((aclrtEvent)event->context));
1937
- }
1938
-
1939
1821
  /**
1940
1822
  * @brief Structure defining the interface for the CANN backend.
1941
1823
  *
@@ -1943,10 +1825,9 @@ static void ggml_backend_cann_event_synchronize(ggml_backend_event_t event) {
1943
1825
  * supported by the CANN backend, including name retrieval, memory
1944
1826
  * management, tensor operations, synchronization, and event handling.
1945
1827
  */
1946
- static ggml_backend_i ggml_backend_cann_interface = {
1828
+ static const ggml_backend_i ggml_backend_cann_interface = {
1947
1829
  /* .get_name = */ ggml_backend_cann_name,
1948
1830
  /* .free = */ ggml_backend_cann_free,
1949
- /* .get_default_buffer_type = */ ggml_backend_cann_get_default_buffer_type,
1950
1831
  /* .set_tensor_async = */ ggml_backend_cann_set_tensor_async,
1951
1832
  /* .get_tensor_async = */ ggml_backend_cann_get_tensor_async,
1952
1833
  /* .cpy_tensor_async = */ ggml_backend_cann_cpy_tensor_async,
@@ -1956,9 +1837,6 @@ static ggml_backend_i ggml_backend_cann_interface = {
1956
1837
  /* .graph_plan_update = */ NULL,
1957
1838
  /* .graph_plan_compute = */ NULL,
1958
1839
  /* .graph_compute = */ ggml_backend_cann_graph_compute,
1959
- /* .supports_op = */ ggml_backend_cann_supports_op,
1960
- /* .supports_buft = */ ggml_backend_cann_supports_buft,
1961
- /* .offload_op = */ ggml_backend_cann_offload_op,
1962
1840
  /* .event_record = */ ggml_backend_cann_event_record,
1963
1841
  /* .event_wait = */ ggml_backend_cann_event_wait,
1964
1842
  };
@@ -1977,6 +1855,234 @@ static ggml_guid_t ggml_backend_cann_guid() {
1977
1855
  return &guid;
1978
1856
  }
1979
1857
 
1858
+ // backend device
1859
+ struct ggml_backend_cann_device_context {
1860
+ int device;
1861
+ std::string name;
1862
+ std::string description;
1863
+ };
1864
+
1865
+ static const char * ggml_backend_cann_device_get_name(ggml_backend_dev_t dev) {
1866
+ ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
1867
+ return ctx->name.c_str();
1868
+ }
1869
+
1870
+ static const char* ggml_backend_cann_device_get_description(ggml_backend_dev_t dev) {
1871
+ ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
1872
+ return ctx->description.c_str();
1873
+ }
1874
+
1875
+ static void ggml_backend_cann_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
1876
+ ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
1877
+ ggml_backend_cann_get_device_memory(ctx->device, free, total);
1878
+ }
1879
+
1880
+ static enum ggml_backend_dev_type ggml_backend_cann_device_get_type(ggml_backend_dev_t dev) {
1881
+ GGML_UNUSED(dev);
1882
+ return GGML_BACKEND_DEVICE_TYPE_GPU;
1883
+ }
1884
+
1885
+ static void ggml_backend_cann_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
1886
+ props->name = ggml_backend_cann_device_get_name(dev);
1887
+ props->description = ggml_backend_cann_device_get_description(dev);
1888
+ props->type = ggml_backend_cann_device_get_type(dev);
1889
+ ggml_backend_cann_device_get_memory(dev, &props->memory_free, &props->memory_total);
1890
+
1891
+ bool host_buffer = getenv("GGML_CANN_NO_PINNED") == nullptr;
1892
+
1893
+ props->caps = {
1894
+ /* .async = */ false,
1895
+ /* .host_buffer = */ host_buffer,
1896
+ /* .buffer_from_host_ptr = */ false,
1897
+ /* .events = */ true,
1898
+ };
1899
+ }
1900
+
1901
+ static ggml_backend_t ggml_backend_cann_device_init(ggml_backend_dev_t dev, const char * params) {
1902
+ GGML_UNUSED(params);
1903
+ ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
1904
+ return ggml_backend_cann_init(ctx->device);
1905
+ }
1906
+
1907
+ /**
1908
+ * @brief Checks if the CANN backend supports a specific backend buffer type.
1909
+ *
1910
+ * This function determines whether the CANN backend supports the given backend
1911
+ * buffer type by comparing the device context of the backend and buffer type.
1912
+ * It returns true if the devices are same between the backend context and
1913
+ * buffer type context.
1914
+ *
1915
+ * @param backend Pointer to the CANN backend.
1916
+ * @param buft Pointer to the backend buffer type to check.
1917
+ * @return bool Returns true if the CANN backend supports the buffer type,
1918
+ * otherwise false.
1919
+ */
1920
+ static bool ggml_backend_cann_supports_buft(
1921
+ ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
1922
+ if (ggml_backend_buft_is_cann(buft)) {
1923
+ ggml_backend_cann_device_context * dev_ctx = (ggml_backend_cann_device_context *)dev->context;
1924
+ ggml_backend_cann_buffer_type_context * buft_ctx =
1925
+ (ggml_backend_cann_buffer_type_context *)buft->context;
1926
+ return buft_ctx->device == dev_ctx->device;
1927
+ }
1928
+ return false;
1929
+ }
1930
+
1931
+ static ggml_backend_buffer_type_t ggml_backend_cann_device_get_buffer_type(ggml_backend_dev_t dev) {
1932
+ ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
1933
+ return ggml_backend_cann_buffer_type(ctx->device);
1934
+ }
1935
+
1936
+ static ggml_backend_buffer_type_t ggml_backend_cann_device_get_host_buffer_type(ggml_backend_dev_t dev) {
1937
+ GGML_UNUSED(dev);
1938
+ return ggml_backend_cann_host_buffer_type();
1939
+ }
1940
+
1941
+ /**
1942
+ * @brief Creates a new event for the CANN backend device.
1943
+ *
1944
+ * This function initializes a new event for the CANN backend by setting the
1945
+ * device and creating an ACL runtime event. The created event is then wrapped
1946
+ * in a ggml_backend_event structure and returned.
1947
+ *
1948
+ * @param backend Pointer to the CANN backend.
1949
+ * @return ggml_backend_event_t Returns a pointer to the new event structure.
1950
+ */
1951
+ static ggml_backend_event_t ggml_backend_cann_device_event_new(
1952
+ ggml_backend_dev_t dev) {
1953
+ ggml_backend_cann_device_context * dev_ctx = (ggml_backend_cann_device_context *)dev->context;
1954
+
1955
+ ggml_cann_set_device(dev_ctx->device);
1956
+
1957
+ aclrtEvent event;
1958
+ ACL_CHECK(aclrtCreateEvent(&event));
1959
+
1960
+ return new ggml_backend_event{
1961
+ /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), dev_ctx->device),
1962
+ /* .context = */ event,
1963
+ };
1964
+ }
1965
+
1966
+ /**
1967
+ * @brief Frees a CANN backend event.
1968
+ *
1969
+ * This function destroys the ACL runtime event associated with the given CANN
1970
+ * backend event and then deletes the event structure itself.
1971
+ *
1972
+ * @param event Pointer to the event structure to be freed.
1973
+ */
1974
+ static void ggml_backend_cann_device_event_free(ggml_backend_dev_t dev, ggml_backend_event_t event) {
1975
+ ACL_CHECK(aclrtDestroyEvent((aclrtEvent)event->context));
1976
+
1977
+ delete event;
1978
+ GGML_UNUSED(dev);
1979
+ }
1980
+
1981
+ /**
1982
+ * @brief Synchronizes the given event on the CANN backend.
1983
+ *
1984
+ * This function waits for the specified event to complete on the ACL runtime.
1985
+ *
1986
+ * @param event Pointer to the event structure to be synchronized.
1987
+ */
1988
+ static void ggml_backend_cann_device_event_synchronize(ggml_backend_dev_t dev, ggml_backend_event_t event) {
1989
+ ACL_CHECK(aclrtSynchronizeEvent((aclrtEvent)event->context));
1990
+
1991
+ GGML_UNUSED(dev);
1992
+ }
1993
+
1994
+ static const ggml_backend_device_i ggml_backend_cann_device_interface = {
1995
+ /* .get_name = */ ggml_backend_cann_device_get_name,
1996
+ /* .get_description = */ ggml_backend_cann_device_get_description,
1997
+ /* .get_memory = */ ggml_backend_cann_device_get_memory,
1998
+ /* .get_type = */ ggml_backend_cann_device_get_type,
1999
+ /* .get_props = */ ggml_backend_cann_device_get_props,
2000
+ /* .init_backend = */ ggml_backend_cann_device_init, // called for every card
2001
+ /* .get_buffer_type = */ ggml_backend_cann_device_get_buffer_type,
2002
+ /* .get_host_buffer_type = */ ggml_backend_cann_device_get_host_buffer_type,
2003
+ /* .buffer_from_host_ptr = */ NULL, // not supported for CANN
2004
+ /* .supports_op = */ ggml_backend_cann_supports_op,
2005
+ /* .supports_buft = */ ggml_backend_cann_supports_buft,
2006
+ /* .offload_op = */ ggml_backend_cann_offload_op,
2007
+ /* .event_new = */ ggml_backend_cann_device_event_new,
2008
+ /* .event_free = */ ggml_backend_cann_device_event_free,
2009
+ /* .event_synchronize = */ ggml_backend_cann_device_event_synchronize,
2010
+ };
2011
+
2012
+
2013
+ // backend reg
2014
+ struct ggml_backend_cann_reg_context {
2015
+ std::vector<ggml_backend_dev_t> devices;
2016
+ };
2017
+
2018
+ static const char * ggml_backend_cann_reg_get_name(ggml_backend_reg_t reg) {
2019
+ GGML_UNUSED(reg);
2020
+ return GGML_CANN_NAME;
2021
+ }
2022
+
2023
+ static size_t ggml_backend_cann_reg_get_device_count(ggml_backend_reg_t reg) {
2024
+ ggml_backend_cann_reg_context * ctx = (ggml_backend_cann_reg_context *)reg->context;
2025
+ return ctx->devices.size();
2026
+ }
2027
+
2028
+ static ggml_backend_dev_t ggml_backend_cann_reg_get_device(ggml_backend_reg_t reg, size_t index) {
2029
+ ggml_backend_cann_reg_context * ctx = (ggml_backend_cann_reg_context *)reg->context;
2030
+ GGML_ASSERT(index < ctx->devices.size());
2031
+ return ctx->devices[index];
2032
+ }
2033
+
2034
+ static void * ggml_backend_cann_reg_get_proc_address(ggml_backend_reg_t reg, const char * name) {
2035
+ GGML_UNUSED(reg);
2036
+ GGML_UNUSED(name);
2037
+ // reserved for future use
2038
+ return nullptr;
2039
+ }
2040
+
2041
+ static const ggml_backend_reg_i ggml_backend_cann_reg_interface = {
2042
+ /* .get_name = */ ggml_backend_cann_reg_get_name,
2043
+ /* .get_device_count = */ ggml_backend_cann_reg_get_device_count,
2044
+ /* .get_device_get = */ ggml_backend_cann_reg_get_device,
2045
+ /* .get_proc_address = */ ggml_backend_cann_reg_get_proc_address,
2046
+ };
2047
+
2048
+ // backend registry, called only once for cann backend
2049
+ ggml_backend_reg_t ggml_backend_cann_reg() {
2050
+ static ggml_backend_reg reg;
2051
+ static bool initialized = false;
2052
+
2053
+ {
2054
+ static std::mutex mutex;
2055
+ std::lock_guard<std::mutex> lock(mutex);
2056
+ if (!initialized) {
2057
+ aclInit(nullptr);
2058
+ ggml_backend_cann_reg_context * ctx = new ggml_backend_cann_reg_context;
2059
+
2060
+ for (int i = 0; i < ggml_cann_info().device_count; i++) {
2061
+ ggml_backend_cann_device_context* dev_ctx = new ggml_backend_cann_device_context();
2062
+ dev_ctx->description = aclrtGetSocName();
2063
+ dev_ctx->device = i;
2064
+ dev_ctx->name = GGML_CANN_NAME + std::to_string(i);
2065
+ ggml_cann_set_device(i);
2066
+ ggml_backend_dev_t dev = new ggml_backend_device {
2067
+ /* .interface = */ ggml_backend_cann_device_interface,
2068
+ /* .reg = */ &reg,
2069
+ /* .context = */ dev_ctx
2070
+ };
2071
+ ctx->devices.push_back(dev);
2072
+ }
2073
+
2074
+ reg = ggml_backend_reg {
2075
+ /* .interface = */ ggml_backend_cann_reg_interface,
2076
+ /* .context = */ ctx
2077
+ };
2078
+ }
2079
+
2080
+ initialized = true;
2081
+ }
2082
+
2083
+ return &reg;
2084
+ }
2085
+
1980
2086
  ggml_backend_t ggml_backend_cann_init(int32_t device) {
1981
2087
  aclInit(nullptr);
1982
2088
  if (device < 0 || device >= ggml_backend_cann_get_device_count()) {
@@ -1993,7 +2099,7 @@ ggml_backend_t ggml_backend_cann_init(int32_t device) {
1993
2099
  ggml_backend_t cann_backend =
1994
2100
  new ggml_backend{/* .guid = */ ggml_backend_cann_guid(),
1995
2101
  /* .interface = */ ggml_backend_cann_interface,
1996
- /* .device = */ nullptr,
2102
+ /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), device),
1997
2103
  /* .context = */ ctx};
1998
2104
 
1999
2105
  return cann_backend;