@fugood/llama.node 0.3.13 → 0.3.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/bin/darwin/arm64/llama-node.node +0 -0
  2. package/bin/darwin/x64/llama-node.node +0 -0
  3. package/bin/linux/arm64/llama-node.node +0 -0
  4. package/bin/linux/x64/llama-node.node +0 -0
  5. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  6. package/bin/linux-cuda/x64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  9. package/bin/win32/arm64/llama-node.node +0 -0
  10. package/bin/win32/arm64/node.lib +0 -0
  11. package/bin/win32/x64/llama-node.node +0 -0
  12. package/bin/win32/x64/node.lib +0 -0
  13. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  14. package/bin/win32-vulkan/arm64/node.lib +0 -0
  15. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/x64/node.lib +0 -0
  17. package/lib/binding.ts +1 -1
  18. package/package.json +1 -1
  19. package/src/LlamaContext.cpp +98 -76
  20. package/src/LlamaContext.h +1 -1
  21. package/src/common.hpp +1 -2
  22. package/src/llama.cpp/.github/workflows/build.yml +89 -10
  23. package/src/llama.cpp/.github/workflows/server.yml +2 -0
  24. package/src/llama.cpp/CMakeLists.txt +9 -1
  25. package/src/llama.cpp/cmake/common.cmake +2 -0
  26. package/src/llama.cpp/common/CMakeLists.txt +3 -3
  27. package/src/llama.cpp/common/arg.cpp +132 -13
  28. package/src/llama.cpp/common/chat.cpp +960 -266
  29. package/src/llama.cpp/common/chat.h +135 -0
  30. package/src/llama.cpp/common/common.cpp +33 -174
  31. package/src/llama.cpp/common/common.h +27 -67
  32. package/src/llama.cpp/common/json-schema-to-grammar.cpp +4 -5
  33. package/src/llama.cpp/common/json-schema-to-grammar.h +0 -1
  34. package/src/llama.cpp/common/{minja.hpp → minja/minja.hpp} +37 -5
  35. package/src/llama.cpp/common/ngram-cache.cpp +1 -0
  36. package/src/llama.cpp/common/sampling.cpp +45 -7
  37. package/src/llama.cpp/common/speculative.cpp +10 -9
  38. package/src/llama.cpp/common/speculative.h +1 -1
  39. package/src/llama.cpp/docs/build.md +45 -7
  40. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +2 -2
  41. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +4 -2
  42. package/src/llama.cpp/examples/embedding/embedding.cpp +2 -1
  43. package/src/llama.cpp/examples/export-lora/export-lora.cpp +4 -2
  44. package/src/llama.cpp/examples/gritlm/gritlm.cpp +2 -2
  45. package/src/llama.cpp/examples/imatrix/imatrix.cpp +3 -4
  46. package/src/llama.cpp/examples/infill/infill.cpp +2 -2
  47. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +2 -2
  48. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +5 -5
  49. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  50. package/src/llama.cpp/examples/llava/clip.cpp +373 -107
  51. package/src/llama.cpp/examples/llava/clip.h +19 -3
  52. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +341 -0
  53. package/src/llama.cpp/examples/llava/llava.cpp +4 -2
  54. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +30 -11
  55. package/src/llama.cpp/examples/lookahead/lookahead.cpp +7 -6
  56. package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
  57. package/src/llama.cpp/examples/main/main.cpp +79 -34
  58. package/src/llama.cpp/examples/parallel/parallel.cpp +6 -5
  59. package/src/llama.cpp/examples/passkey/passkey.cpp +15 -14
  60. package/src/llama.cpp/examples/perplexity/perplexity.cpp +6 -6
  61. package/src/llama.cpp/examples/quantize/quantize.cpp +1 -0
  62. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -2
  63. package/src/llama.cpp/examples/retrieval/retrieval.cpp +1 -1
  64. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +882 -237
  65. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +35 -26
  66. package/src/llama.cpp/examples/run/run.cpp +196 -108
  67. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +2 -2
  68. package/src/llama.cpp/examples/server/server.cpp +113 -101
  69. package/src/llama.cpp/examples/server/utils.hpp +94 -105
  70. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
  71. package/src/llama.cpp/examples/speculative/speculative.cpp +14 -14
  72. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
  73. package/src/llama.cpp/examples/sycl/run-llama2.sh +2 -2
  74. package/src/llama.cpp/examples/tts/tts.cpp +263 -151
  75. package/src/llama.cpp/ggml/CMakeLists.txt +14 -1
  76. package/src/llama.cpp/ggml/cmake/common.cmake +26 -0
  77. package/src/llama.cpp/ggml/include/ggml-alloc.h +1 -1
  78. package/src/llama.cpp/ggml/include/ggml-backend.h +3 -3
  79. package/src/llama.cpp/ggml/include/ggml-cpu.h +3 -0
  80. package/src/llama.cpp/ggml/include/ggml.h +29 -1
  81. package/src/llama.cpp/ggml/src/CMakeLists.txt +15 -34
  82. package/src/llama.cpp/ggml/src/ggml-alloc.c +24 -15
  83. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +1 -1
  84. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +58 -54
  85. package/src/llama.cpp/ggml/src/ggml-backend.cpp +10 -8
  86. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +6 -2
  87. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +3 -7
  88. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +3 -5
  89. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +139 -16
  90. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +2 -1
  91. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +4 -0
  92. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -1
  93. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +151 -0
  94. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1546 -387
  95. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1645 -113
  96. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +22 -0
  97. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +259 -0
  98. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +61 -0
  99. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +288 -0
  100. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  101. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +15 -2
  102. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +2 -1
  103. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -1
  104. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +14 -0
  105. package/src/llama.cpp/ggml/src/ggml-impl.h +1 -1
  106. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -5
  107. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +242 -0
  108. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +6 -6
  109. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +1 -0
  110. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -138
  111. package/src/llama.cpp/ggml/src/ggml-quants.c +114 -114
  112. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2 -1
  113. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +5 -0
  114. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +2 -1
  115. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +17 -0
  116. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +117 -36
  117. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +33 -4
  118. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +2 -2
  119. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
  120. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
  121. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +55 -0
  122. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +147 -16
  123. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +40 -40
  124. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +307 -0
  125. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +23 -0
  126. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +262 -746
  127. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -1
  128. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -78
  129. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +114 -6
  130. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +6 -0
  131. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +4 -1
  132. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
  133. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
  134. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +305 -0
  135. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +10 -0
  136. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +498 -188
  137. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -4
  138. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +16 -3
  139. package/src/llama.cpp/ggml/src/ggml.c +93 -5
  140. package/src/llama.cpp/include/llama.h +105 -27
  141. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
  142. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
  143. package/src/llama.cpp/requirements/requirements-all.txt +1 -0
  144. package/src/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
  145. package/src/llama.cpp/requirements.txt +1 -0
  146. package/src/llama.cpp/src/CMakeLists.txt +5 -2
  147. package/src/llama.cpp/src/llama-adapter.cpp +19 -20
  148. package/src/llama.cpp/src/llama-adapter.h +11 -9
  149. package/src/llama.cpp/src/llama-arch.cpp +123 -16
  150. package/src/llama.cpp/src/llama-arch.h +19 -0
  151. package/src/llama.cpp/src/llama-batch.h +2 -2
  152. package/src/llama.cpp/src/llama-chat.cpp +1 -0
  153. package/src/llama.cpp/src/llama-context.cpp +2253 -1222
  154. package/src/llama.cpp/src/llama-context.h +214 -77
  155. package/src/llama.cpp/src/llama-cparams.h +1 -0
  156. package/src/llama.cpp/src/llama-grammar.cpp +182 -182
  157. package/src/llama.cpp/src/llama-grammar.h +12 -3
  158. package/src/llama.cpp/src/llama-graph.cpp +1662 -0
  159. package/src/llama.cpp/src/llama-graph.h +574 -0
  160. package/src/llama.cpp/src/llama-hparams.cpp +8 -0
  161. package/src/llama.cpp/src/llama-hparams.h +9 -0
  162. package/src/llama.cpp/src/llama-io.cpp +15 -0
  163. package/src/llama.cpp/src/llama-io.h +35 -0
  164. package/src/llama.cpp/src/llama-kv-cache.cpp +1006 -291
  165. package/src/llama.cpp/src/llama-kv-cache.h +178 -109
  166. package/src/llama.cpp/src/llama-memory.cpp +1 -0
  167. package/src/llama.cpp/src/llama-memory.h +21 -0
  168. package/src/llama.cpp/src/llama-mmap.cpp +11 -1
  169. package/src/llama.cpp/src/llama-model.cpp +8230 -122
  170. package/src/llama.cpp/src/llama-model.h +34 -1
  171. package/src/llama.cpp/src/llama-quant.cpp +10 -1
  172. package/src/llama.cpp/src/llama-sampling.cpp +43 -10
  173. package/src/llama.cpp/src/llama-vocab.cpp +12 -0
  174. package/src/llama.cpp/src/llama.cpp +51 -9837
  175. package/src/llama.cpp/tests/test-backend-ops.cpp +247 -112
  176. package/src/llama.cpp/tests/test-chat-template.cpp +32 -22
  177. package/src/llama.cpp/tests/test-chat.cpp +593 -395
  178. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +63 -63
  179. package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -9
  180. package/src/llama.cpp/Sources/llama/llama.h +0 -4
  181. package/src/llama.cpp/common/chat.hpp +0 -55
  182. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +0 -143
  183. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +0 -9
  184. /package/src/llama.cpp/common/{chat-template.hpp → minja/chat-template.hpp} +0 -0
@@ -56,7 +56,7 @@ extern "C" {
56
56
  GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
57
57
  GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
58
58
  GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
59
- GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
59
+ GGML_API enum ggml_status ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
60
60
  GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
61
61
  GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
62
62
  GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
@@ -342,8 +342,8 @@ extern "C" {
342
342
  GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
343
343
 
344
344
  // Tensor initialization
345
- GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
346
- GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
345
+ GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
346
+ GGML_API enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor);
347
347
 
348
348
  // CPU buffer types are always available
349
349
  GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
@@ -80,6 +80,7 @@ extern "C" {
80
80
  GGML_BACKEND_API int ggml_cpu_has_avx (void);
81
81
  GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
82
82
  GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
83
+ GGML_BACKEND_API int ggml_cpu_has_bmi2 (void);
83
84
  GGML_BACKEND_API int ggml_cpu_has_f16c (void);
84
85
  GGML_BACKEND_API int ggml_cpu_has_fma (void);
85
86
  GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
@@ -95,9 +96,11 @@ extern "C" {
95
96
  GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
96
97
  GGML_BACKEND_API int ggml_cpu_has_sve (void);
97
98
  GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes
99
+ GGML_BACKEND_API int ggml_cpu_has_sme (void);
98
100
  // other
99
101
  GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
100
102
  GGML_BACKEND_API int ggml_cpu_has_vsx (void);
103
+ GGML_BACKEND_API int ggml_cpu_has_vxe (void);
101
104
  GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
102
105
  GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
103
106
 
@@ -454,6 +454,7 @@ extern "C" {
454
454
  GGML_OP_RMS_NORM,
455
455
  GGML_OP_RMS_NORM_BACK,
456
456
  GGML_OP_GROUP_NORM,
457
+ GGML_OP_L2_NORM,
457
458
 
458
459
  GGML_OP_MUL_MAT,
459
460
  GGML_OP_MUL_MAT_ID,
@@ -502,6 +503,7 @@ extern "C" {
502
503
  GGML_OP_ADD_REL_POS,
503
504
  GGML_OP_RWKV_WKV6,
504
505
  GGML_OP_GATED_LINEAR_ATTN,
506
+ GGML_OP_RWKV_WKV7,
505
507
 
506
508
  GGML_OP_UNARY,
507
509
 
@@ -1095,6 +1097,18 @@ extern "C" {
1095
1097
  int n_groups,
1096
1098
  float eps);
1097
1099
 
1100
+ // l2 normalize along rows
1101
+ // used in rwkv v7
1102
+ GGML_API struct ggml_tensor * ggml_l2_norm(
1103
+ struct ggml_context * ctx,
1104
+ struct ggml_tensor * a,
1105
+ float eps);
1106
+
1107
+ GGML_API struct ggml_tensor * ggml_l2_norm_inplace(
1108
+ struct ggml_context * ctx,
1109
+ struct ggml_tensor * a,
1110
+ float eps);
1111
+
1098
1112
  // a - x
1099
1113
  // b - dy
1100
1114
  GGML_API struct ggml_tensor * ggml_rms_norm_back(
@@ -1890,6 +1904,16 @@ extern "C" {
1890
1904
  struct ggml_tensor * state,
1891
1905
  float scale);
1892
1906
 
1907
+ GGML_API struct ggml_tensor * ggml_rwkv_wkv7(
1908
+ struct ggml_context * ctx,
1909
+ struct ggml_tensor * r,
1910
+ struct ggml_tensor * w,
1911
+ struct ggml_tensor * k,
1912
+ struct ggml_tensor * v,
1913
+ struct ggml_tensor * a,
1914
+ struct ggml_tensor * b,
1915
+ struct ggml_tensor * state);
1916
+
1893
1917
  // custom operators
1894
1918
 
1895
1919
  typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
@@ -2140,7 +2164,11 @@ extern "C" {
2140
2164
  # define GGML_RESTRICT
2141
2165
  # endif
2142
2166
  #else
2143
- # define GGML_RESTRICT restrict
2167
+ # if defined (_MSC_VER) && (__STDC_VERSION__ < 201112L)
2168
+ # define GGML_RESTRICT __restrict
2169
+ # else
2170
+ # define GGML_RESTRICT restrict
2171
+ # endif
2144
2172
  #endif
2145
2173
  typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
2146
2174
  typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
@@ -1,4 +1,5 @@
1
1
  include(CheckCXXCompilerFlag)
2
+ include("../cmake/common.cmake")
2
3
 
3
4
  add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES})
4
5
 
@@ -24,33 +25,6 @@ if (NOT MSVC)
24
25
  endif()
25
26
  endif()
26
27
 
27
- function(ggml_get_flags CCID CCVER)
28
- set(C_FLAGS "")
29
- set(CXX_FLAGS "")
30
-
31
- if (CCID MATCHES "Clang")
32
- set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return)
33
- set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)
34
-
35
- if (
36
- (CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
37
- (CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
38
- )
39
- list(APPEND C_FLAGS -Wdouble-promotion)
40
- endif()
41
- elseif (CCID STREQUAL "GNU")
42
- set(C_FLAGS -Wdouble-promotion)
43
- set(CXX_FLAGS -Wno-array-bounds)
44
-
45
- if (CCVER VERSION_GREATER_EQUAL 8.1.0)
46
- list(APPEND CXX_FLAGS -Wextra-semi)
47
- endif()
48
- endif()
49
-
50
- set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE)
51
- set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
52
- endfunction()
53
-
54
28
  if (GGML_FATAL_WARNINGS)
55
29
  if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
56
30
  list(APPEND C_FLAGS -Werror)
@@ -226,6 +200,9 @@ add_library(ggml-base
226
200
  gguf.cpp)
227
201
 
228
202
  target_include_directories(ggml-base PRIVATE .)
203
+ if (GGML_BACKEND_DL)
204
+ target_compile_definitions(ggml-base PUBLIC GGML_BACKEND_DL)
205
+ endif()
229
206
 
230
207
  add_library(ggml
231
208
  ggml-backend-reg.cpp)
@@ -233,7 +210,7 @@ add_library(ggml
233
210
  target_link_libraries(ggml PUBLIC ggml-base)
234
211
 
235
212
  if (CMAKE_SYSTEM_NAME MATCHES "Linux")
236
- target_link_libraries(ggml PRIVATE dl)
213
+ target_link_libraries(ggml PRIVATE dl stdc++fs)
237
214
  endif()
238
215
 
239
216
  function(ggml_add_backend_library backend)
@@ -286,7 +263,7 @@ function(ggml_add_cpu_backend_variant tag_name)
286
263
  set(GGML_CPU_TAG_NAME ${tag_name})
287
264
  # other: OPENMP LLAMAFILE CPU_HBM
288
265
  foreach (feat NATIVE
289
- AVX AVX2 AVX_VNNI FMA F16C
266
+ AVX AVX2 BMI2 AVX_VNNI FMA F16C
290
267
  AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
291
268
  AMX_TILE AMX_INT8 AMX_BF16)
292
269
  set(GGML_${feat} OFF)
@@ -306,13 +283,13 @@ if (GGML_CPU_ALL_VARIANTS)
306
283
  message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
307
284
  endif()
308
285
  ggml_add_cpu_backend_variant(sandybridge AVX)
309
- ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 FMA)
310
- ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
311
- ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
312
- ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
286
+ ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 BMI2 FMA)
287
+ ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 BMI2 FMA AVX512)
288
+ ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
289
+ ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 BMI2 FMA AVX_VNNI)
313
290
  if (NOT MSVC)
314
291
  # MSVC doesn't support AMX
315
- ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
292
+ ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
316
293
  endif()
317
294
  elseif (GGML_CPU)
318
295
  ggml_add_cpu_backend_variant_impl("")
@@ -348,6 +325,10 @@ if (CMAKE_SYSTEM_NAME MATCHES "Android")
348
325
  target_link_libraries(ggml-base PRIVATE dl)
349
326
  endif()
350
327
 
328
+ if(CMAKE_SYSTEM_NAME MATCHES "visionOS")
329
+ target_compile_definitions(ggml-base PUBLIC _DARWIN_C_SOURCE)
330
+ endif()
331
+
351
332
  if (BUILD_SHARED_LIBS)
352
333
  foreach (target ggml-base ggml)
353
334
  set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON)
@@ -89,7 +89,7 @@ struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer) {
89
89
  return talloc;
90
90
  }
91
91
 
92
- void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
92
+ enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
93
93
  size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor);
94
94
  size = GGML_PAD(size, talloc->alignment);
95
95
 
@@ -104,7 +104,7 @@ void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tenso
104
104
 
105
105
  assert(((uintptr_t)addr % talloc->alignment) == 0);
106
106
 
107
- ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
107
+ return ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
108
108
  }
109
109
 
110
110
  // dynamic tensor allocator
@@ -933,42 +933,51 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
933
933
 
934
934
  // utils
935
935
 
936
+ static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) {
937
+ for (size_t i = 0; i < *n_buffers; i++) {
938
+ ggml_backend_buffer_free((*buffers)[i]);
939
+ }
940
+ free(*buffers);
941
+ }
942
+
936
943
  static bool alloc_tensor_range(struct ggml_context * ctx,
937
944
  struct ggml_tensor * first, struct ggml_tensor * last,
938
945
  ggml_backend_buffer_type_t buft, size_t size,
939
946
  ggml_backend_buffer_t ** buffers, size_t * n_buffers) {
947
+
940
948
  ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);
941
949
  if (buffer == NULL) {
942
- #ifndef NDEBUG
943
- GGML_LOG_DEBUG("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
944
- #endif
945
- for (size_t i = 0; i < *n_buffers; i++) {
946
- ggml_backend_buffer_free((*buffers)[i]);
947
- }
948
- free(*buffers);
950
+ GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
951
+ free_buffers(buffers, n_buffers);
949
952
  return false;
950
953
  }
951
954
 
955
+ *buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
956
+ (*buffers)[(*n_buffers)++] = buffer;
957
+
952
958
  struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
953
959
 
954
960
  for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
961
+ enum ggml_status status = GGML_STATUS_SUCCESS;
955
962
  if (t->data == NULL) {
956
963
  if (t->view_src == NULL) {
957
- ggml_tallocr_alloc(&tallocr, t);
964
+ status = ggml_tallocr_alloc(&tallocr, t);
958
965
  } else if (t->buffer == NULL) {
959
- ggml_backend_view_init(t);
966
+ status = ggml_backend_view_init(t);
960
967
  }
961
968
  } else {
962
969
  if (t->view_src != NULL && t->buffer == NULL) {
963
970
  // view of a pre-allocated tensor
964
- ggml_backend_view_init(t);
971
+ status = ggml_backend_view_init(t);
965
972
  }
966
973
  }
974
+ if (status != GGML_STATUS_SUCCESS) {
975
+ GGML_LOG_ERROR("%s: failed to initialize tensor %s\n", __func__, t->name);
976
+ free_buffers(buffers, n_buffers);
977
+ return false;
978
+ }
967
979
  }
968
980
 
969
- *buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
970
- (*buffers)[(*n_buffers)++] = buffer;
971
-
972
981
  return true;
973
982
  }
974
983
 
@@ -44,7 +44,7 @@ extern "C" {
44
44
  // base address of the buffer
45
45
  void * (*get_base) (ggml_backend_buffer_t buffer);
46
46
  // (optional) initialize a tensor in the buffer (eg. add tensor extras)
47
- void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
47
+ enum ggml_status (*init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
48
48
  // tensor data access
49
49
  void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
50
50
  void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
@@ -2,14 +2,13 @@
2
2
  #include "ggml-backend.h"
3
3
  #include "ggml-impl.h"
4
4
  #include <algorithm>
5
- #include <codecvt>
6
5
  #include <cstring>
7
6
  #include <filesystem>
8
- #include <locale>
9
7
  #include <memory>
10
8
  #include <string>
11
9
  #include <type_traits>
12
10
  #include <vector>
11
+ #include <cctype>
13
12
 
14
13
  #ifdef _WIN32
15
14
  # define WIN32_LEAN_AND_MEAN
@@ -72,14 +71,22 @@
72
71
  # pragma clang diagnostic ignored "-Wdeprecated-declarations"
73
72
  #endif
74
73
 
75
- static std::wstring utf8_to_utf16(const std::string & str) {
76
- std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
77
- return converter.from_bytes(str);
78
- }
74
+ namespace fs = std::filesystem;
79
75
 
80
- static std::string utf16_to_utf8(const std::wstring & str) {
81
- std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
82
- return converter.to_bytes(str);
76
+ static std::string path_str(const fs::path & path) {
77
+ std::string u8path;
78
+ try {
79
+ #if defined(__cpp_lib_char8_t)
80
+ // C++20 and later: u8string() returns std::u8string
81
+ std::u8string u8str = path.u8string();
82
+ u8path = std::string(reinterpret_cast<const char*>(u8str.c_str()));
83
+ #else
84
+ // C++17: u8string() returns std::string
85
+ u8path = path.u8string();
86
+ #endif
87
+ } catch (...) {
88
+ }
89
+ return u8path;
83
90
  }
84
91
 
85
92
  #if defined(__clang__)
@@ -96,12 +103,12 @@ struct dl_handle_deleter {
96
103
  }
97
104
  };
98
105
 
99
- static dl_handle * dl_load_library(const std::wstring & path) {
106
+ static dl_handle * dl_load_library(const fs::path & path) {
100
107
  // suppress error dialogs for missing DLLs
101
108
  DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
102
109
  SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
103
110
 
104
- HMODULE handle = LoadLibraryW(path.c_str());
111
+ HMODULE handle = LoadLibraryW(path.wstring().c_str());
105
112
 
106
113
  SetErrorMode(old_mode);
107
114
 
@@ -129,8 +136,8 @@ struct dl_handle_deleter {
129
136
  }
130
137
  };
131
138
 
132
- static void * dl_load_library(const std::wstring & path) {
133
- dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL);
139
+ static void * dl_load_library(const fs::path & path) {
140
+ dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
134
141
 
135
142
  return handle;
136
143
  }
@@ -217,11 +224,11 @@ struct ggml_backend_registry {
217
224
  devices.push_back(device);
218
225
  }
219
226
 
220
- ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
227
+ ggml_backend_reg_t load_backend(const fs::path & path, bool silent) {
221
228
  dl_handle_ptr handle { dl_load_library(path) };
222
229
  if (!handle) {
223
230
  if (!silent) {
224
- GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str());
231
+ GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path_str(path).c_str());
225
232
  }
226
233
  return nullptr;
227
234
  }
@@ -229,7 +236,7 @@ struct ggml_backend_registry {
229
236
  auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
230
237
  if (score_fn && score_fn() == 0) {
231
238
  if (!silent) {
232
- GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str());
239
+ GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path_str(path).c_str());
233
240
  }
234
241
  return nullptr;
235
242
  }
@@ -237,7 +244,7 @@ struct ggml_backend_registry {
237
244
  auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
238
245
  if (!backend_init_fn) {
239
246
  if (!silent) {
240
- GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str());
247
+ GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path_str(path).c_str());
241
248
  }
242
249
  return nullptr;
243
250
  }
@@ -246,16 +253,17 @@ struct ggml_backend_registry {
246
253
  if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
247
254
  if (!silent) {
248
255
  if (!reg) {
249
- GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str());
256
+ GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n",
257
+ __func__, path_str(path).c_str());
250
258
  } else {
251
259
  GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
252
- __func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
260
+ __func__, path_str(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
253
261
  }
254
262
  }
255
263
  return nullptr;
256
264
  }
257
265
 
258
- GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
266
+ GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path_str(path).c_str());
259
267
 
260
268
  register_backend(reg, std::move(handle));
261
269
 
@@ -391,14 +399,14 @@ ggml_backend_t ggml_backend_init_best(void) {
391
399
 
392
400
  // Dynamic loading
393
401
  ggml_backend_reg_t ggml_backend_load(const char * path) {
394
- return get_reg().load_backend(utf8_to_utf16(path), false);
402
+ return get_reg().load_backend(path, false);
395
403
  }
396
404
 
397
405
  void ggml_backend_unload(ggml_backend_reg_t reg) {
398
406
  get_reg().unload_backend(reg, true);
399
407
  }
400
408
 
401
- static std::wstring get_executable_path() {
409
+ static fs::path get_executable_path() {
402
410
  #if defined(__APPLE__)
403
411
  // get executable path
404
412
  std::vector<char> path;
@@ -416,7 +424,7 @@ static std::wstring get_executable_path() {
416
424
  if (last_slash != std::string::npos) {
417
425
  base_path = base_path.substr(0, last_slash);
418
426
  }
419
- return utf8_to_utf16(base_path + "/");
427
+ return base_path + "/";
420
428
  #elif defined(__linux__) || defined(__FreeBSD__)
421
429
  std::string base_path = ".";
422
430
  std::vector<char> path(1024);
@@ -442,7 +450,7 @@ static std::wstring get_executable_path() {
442
450
  path.resize(path.size() * 2);
443
451
  }
444
452
 
445
- return utf8_to_utf16(base_path + "/");
453
+ return base_path + "/";
446
454
  #elif defined(_WIN32)
447
455
  std::vector<wchar_t> path(MAX_PATH);
448
456
  DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
@@ -461,74 +469,69 @@ static std::wstring get_executable_path() {
461
469
  #endif
462
470
  }
463
471
 
464
- static std::wstring backend_filename_prefix() {
465
- #ifdef _WIN32
466
- return L"ggml-";
467
- #else
468
- return L"libggml-";
469
- #endif
470
- }
471
-
472
- static std::wstring backend_filename_suffix() {
472
+ static fs::path backend_filename_prefix() {
473
473
  #ifdef _WIN32
474
- return L".dll";
474
+ return fs::u8path("ggml-");
475
475
  #else
476
- return L".so";
476
+ return fs::u8path("libggml-");
477
477
  #endif
478
478
  }
479
479
 
480
- static std::wstring path_separator() {
480
+ static fs::path backend_filename_extension() {
481
481
  #ifdef _WIN32
482
- return L"\\";
482
+ return fs::u8path(".dll");
483
483
  #else
484
- return L"/";
484
+ return fs::u8path(".so");
485
485
  #endif
486
486
  }
487
487
 
488
488
  static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
489
489
  // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
490
- // TODO: search system paths
491
- std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-";
492
- std::vector<std::wstring> search_paths;
490
+ const fs::path name_path = fs::u8path(name);
491
+ const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native();
492
+ const fs::path file_extension = backend_filename_extension();
493
+
494
+ std::vector<fs::path> search_paths;
493
495
  if (user_search_path == nullptr) {
494
- search_paths.push_back(L"." + path_separator());
496
+ // default search paths: executable directory, current directory
495
497
  search_paths.push_back(get_executable_path());
498
+ search_paths.push_back(fs::current_path());
496
499
  } else {
497
- search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator());
500
+ search_paths.push_back(fs::u8path(user_search_path));
498
501
  }
499
502
 
500
503
  int best_score = 0;
501
- std::wstring best_path;
504
+ fs::path best_path;
502
505
 
503
- namespace fs = std::filesystem;
504
506
  for (const auto & search_path : search_paths) {
505
507
  if (!fs::exists(search_path)) {
508
+ GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str());
506
509
  continue;
507
510
  }
508
511
  fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
509
512
  for (const auto & entry : dir_it) {
510
513
  if (entry.is_regular_file()) {
511
- std::wstring filename = entry.path().filename().wstring();
512
- std::wstring ext = entry.path().extension().wstring();
513
- if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
514
- dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
514
+ auto filename = entry.path().filename();
515
+ auto ext = entry.path().extension();
516
+ if (filename.native().find(file_prefix) == 0 && ext == file_extension) {
517
+ dl_handle_ptr handle { dl_load_library(entry) };
515
518
  if (!handle && !silent) {
516
- GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
519
+ GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path_str(entry.path()).c_str());
517
520
  }
518
521
  if (handle) {
519
522
  auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
520
523
  if (score_fn) {
521
524
  int s = score_fn();
522
525
  #ifndef NDEBUG
523
- GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
526
+ GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, path_str(entry.path()).c_str(), s);
524
527
  #endif
525
528
  if (s > best_score) {
526
529
  best_score = s;
527
- best_path = entry.path().wstring();
530
+ best_path = entry.path();
528
531
  }
529
532
  } else {
530
533
  if (!silent) {
531
- GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
534
+ GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, path_str(entry.path()).c_str());
532
535
  }
533
536
  }
534
537
  }
@@ -540,7 +543,8 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
540
543
  if (best_score == 0) {
541
544
  // try to load the base backend
542
545
  for (const auto & search_path : search_paths) {
543
- std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix();
546
+ fs::path filename = backend_filename_prefix().native() + name_path.native() + backend_filename_extension().native();
547
+ fs::path path = search_path / filename;
544
548
  if (fs::exists(path)) {
545
549
  return get_reg().load_backend(path, silent);
546
550
  }
@@ -21,6 +21,7 @@
21
21
  #include <string.h>
22
22
  #include <string>
23
23
  #include <vector>
24
+ #include <algorithm>
24
25
 
25
26
  #ifdef __APPLE__
26
27
  #include <sys/types.h>
@@ -126,11 +127,12 @@ void * ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) {
126
127
  return base;
127
128
  }
128
129
 
129
- void ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
130
+ enum ggml_status ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
130
131
  // init_tensor is optional
131
132
  if (buffer->iface.init_tensor) {
132
- buffer->iface.init_tensor(buffer, tensor);
133
+ return buffer->iface.init_tensor(buffer, tensor);
133
134
  }
135
+ return GGML_STATUS_SUCCESS;
134
136
  }
135
137
 
136
138
  void ggml_backend_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
@@ -1641,7 +1643,7 @@ ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched,
1641
1643
 
1642
1644
  // utils
1643
1645
 
1644
- void ggml_backend_view_init(struct ggml_tensor * tensor) {
1646
+ enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor) {
1645
1647
  GGML_ASSERT(tensor->buffer == NULL);
1646
1648
  GGML_ASSERT(tensor->view_src != NULL);
1647
1649
  GGML_ASSERT(tensor->view_src->buffer != NULL);
@@ -1649,10 +1651,10 @@ void ggml_backend_view_init(struct ggml_tensor * tensor) {
1649
1651
 
1650
1652
  tensor->buffer = tensor->view_src->buffer;
1651
1653
  tensor->data = (char *)tensor->view_src->data + tensor->view_offs;
1652
- ggml_backend_buffer_init_tensor(tensor->buffer, tensor);
1654
+ return ggml_backend_buffer_init_tensor(tensor->buffer, tensor);
1653
1655
  }
1654
1656
 
1655
- void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) {
1657
+ enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) {
1656
1658
  GGML_ASSERT(tensor->buffer == NULL);
1657
1659
  GGML_ASSERT(tensor->data == NULL);
1658
1660
  GGML_ASSERT(tensor->view_src == NULL);
@@ -1662,7 +1664,7 @@ void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor
1662
1664
 
1663
1665
  tensor->buffer = buffer;
1664
1666
  tensor->data = addr;
1665
- ggml_backend_buffer_init_tensor(buffer, tensor);
1667
+ return ggml_backend_buffer_init_tensor(buffer, tensor);
1666
1668
  }
1667
1669
 
1668
1670
  static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies,
@@ -1708,7 +1710,8 @@ static void graph_copy_init_tensor(struct ggml_hash_set * hash_set, struct ggml_
1708
1710
  struct ggml_tensor * dst = node_copies[id];
1709
1711
  if (dst->view_src != NULL) {
1710
1712
  graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src);
1711
- ggml_backend_view_init(dst);
1713
+ enum ggml_status status = ggml_backend_view_init(dst);
1714
+ GGML_ASSERT(status == GGML_STATUS_SUCCESS);
1712
1715
  }
1713
1716
  else {
1714
1717
  ggml_backend_tensor_copy(src, dst);
@@ -1823,7 +1826,6 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t
1823
1826
  assert(g1->n_nodes == g2->n_nodes);
1824
1827
 
1825
1828
  for (int i = 0; i < g1->n_nodes; i++) {
1826
- //printf("eval %d/%d\n", i, g1->n_nodes);
1827
1829
  struct ggml_tensor * t1 = g1->nodes[i];
1828
1830
  struct ggml_tensor * t2 = g2->nodes[i];
1829
1831
 
@@ -2790,10 +2790,14 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
2790
2790
  (char*)output_buffer + batch1 * output_stride, ACL_FLOAT16,
2791
2791
  output_elem_size, output_ne, output_nb, 2, ACL_FORMAT_ND,
2792
2792
  output_ne_offset);
2793
+ int64_t antiquantGroupSize = 0;
2794
+ if (src0->ne[0] > QK8_0) {
2795
+ antiquantGroupSize = QK8_0;
2796
+ }
2793
2797
 
2794
2798
  ACL_CHECK(aclnnWeightQuantBatchMatmulV2GetWorkspaceSize(
2795
2799
  acl_input_tensor, acl_weight_tensor, acl_scale_tensor, nullptr,
2796
- nullptr, nullptr, nullptr, QK8_0, acl_output_tensor,
2800
+ nullptr, nullptr, nullptr, antiquantGroupSize, acl_output_tensor,
2797
2801
  &workspaceSize, &executor));
2798
2802
  if (workspaceAddr == nullptr) {
2799
2803
  workspaceAddr = workspace_allocator.alloc(workspaceSize);
@@ -2833,7 +2837,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
2833
2837
 
2834
2838
  ACL_CHECK(aclnnWeightQuantBatchMatmulV2GetWorkspaceSize(
2835
2839
  acl_input_tensor, acl_weight_tensor, acl_scale_tensor,
2836
- nullptr, nullptr, nullptr, nullptr, QK8_0,
2840
+ nullptr, nullptr, nullptr, nullptr, antiquantGroupSize,
2837
2841
  acl_output_tensor, &workspaceSize, &executor));
2838
2842
  ACL_CHECK(aclnnWeightQuantBatchMatmulV2(
2839
2843
  workspaceAddr, workspaceSize, executor, ctx.stream()));