@fugood/llama.node 0.3.12 → 0.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/bin/darwin/arm64/llama-node.node +0 -0
  2. package/bin/darwin/x64/llama-node.node +0 -0
  3. package/bin/linux/arm64/llama-node.node +0 -0
  4. package/bin/linux/x64/llama-node.node +0 -0
  5. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  6. package/bin/linux-cuda/x64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  9. package/bin/win32/arm64/llama-node.node +0 -0
  10. package/bin/win32/arm64/node.lib +0 -0
  11. package/bin/win32/x64/llama-node.node +0 -0
  12. package/bin/win32/x64/node.lib +0 -0
  13. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  14. package/bin/win32-vulkan/arm64/node.lib +0 -0
  15. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/x64/node.lib +0 -0
  17. package/lib/binding.ts +2 -1
  18. package/package.json +1 -1
  19. package/src/LlamaCompletionWorker.cpp +14 -0
  20. package/src/LlamaContext.cpp +110 -79
  21. package/src/LlamaContext.h +1 -1
  22. package/src/common.hpp +1 -2
  23. package/src/llama.cpp/.github/workflows/build.yml +95 -13
  24. package/src/llama.cpp/.github/workflows/docker.yml +2 -0
  25. package/src/llama.cpp/.github/workflows/labeler.yml +1 -1
  26. package/src/llama.cpp/.github/workflows/server.yml +2 -0
  27. package/src/llama.cpp/common/CMakeLists.txt +23 -6
  28. package/src/llama.cpp/common/arg.cpp +292 -14
  29. package/src/llama.cpp/common/chat.cpp +1128 -315
  30. package/src/llama.cpp/common/chat.h +135 -0
  31. package/src/llama.cpp/common/common.cpp +27 -171
  32. package/src/llama.cpp/common/common.h +41 -73
  33. package/src/llama.cpp/common/json-schema-to-grammar.cpp +4 -5
  34. package/src/llama.cpp/common/json-schema-to-grammar.h +0 -1
  35. package/src/llama.cpp/common/llguidance.cpp +3 -3
  36. package/src/llama.cpp/common/log.cpp +1 -0
  37. package/src/llama.cpp/common/log.h +2 -1
  38. package/src/llama.cpp/common/{chat-template.hpp → minja/chat-template.hpp} +21 -7
  39. package/src/llama.cpp/common/{minja.hpp → minja/minja.hpp} +61 -14
  40. package/src/llama.cpp/common/ngram-cache.cpp +1 -0
  41. package/src/llama.cpp/common/sampling.cpp +93 -49
  42. package/src/llama.cpp/common/speculative.cpp +6 -5
  43. package/src/llama.cpp/common/speculative.h +1 -1
  44. package/src/llama.cpp/docs/build.md +47 -9
  45. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +3 -1
  46. package/src/llama.cpp/examples/embedding/embedding.cpp +1 -0
  47. package/src/llama.cpp/examples/export-lora/export-lora.cpp +4 -2
  48. package/src/llama.cpp/examples/imatrix/imatrix.cpp +4 -4
  49. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +6 -5
  50. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +1 -1
  51. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +1 -1
  52. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  53. package/src/llama.cpp/examples/llava/clip.cpp +373 -107
  54. package/src/llama.cpp/examples/llava/clip.h +19 -3
  55. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +341 -0
  56. package/src/llama.cpp/examples/llava/llava.cpp +4 -2
  57. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +30 -11
  58. package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -0
  59. package/src/llama.cpp/examples/main/main.cpp +73 -28
  60. package/src/llama.cpp/examples/parallel/parallel.cpp +1 -0
  61. package/src/llama.cpp/examples/passkey/passkey.cpp +1 -0
  62. package/src/llama.cpp/examples/perplexity/perplexity.cpp +1 -0
  63. package/src/llama.cpp/examples/quantize/quantize.cpp +1 -0
  64. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +882 -237
  65. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +35 -26
  66. package/src/llama.cpp/examples/run/run.cpp +115 -79
  67. package/src/llama.cpp/examples/server/CMakeLists.txt +1 -1
  68. package/src/llama.cpp/examples/server/httplib.h +381 -292
  69. package/src/llama.cpp/examples/server/server.cpp +134 -128
  70. package/src/llama.cpp/examples/server/utils.hpp +95 -106
  71. package/src/llama.cpp/examples/sycl/run-llama2.sh +2 -2
  72. package/src/llama.cpp/examples/tts/tts.cpp +251 -142
  73. package/src/llama.cpp/ggml/CMakeLists.txt +13 -1
  74. package/src/llama.cpp/ggml/include/ggml-alloc.h +1 -1
  75. package/src/llama.cpp/ggml/include/ggml-backend.h +3 -3
  76. package/src/llama.cpp/ggml/include/ggml-cpu.h +4 -1
  77. package/src/llama.cpp/ggml/include/ggml-metal.h +1 -1
  78. package/src/llama.cpp/ggml/include/ggml-vulkan.h +0 -2
  79. package/src/llama.cpp/ggml/include/ggml.h +6 -2
  80. package/src/llama.cpp/ggml/src/CMakeLists.txt +10 -7
  81. package/src/llama.cpp/ggml/src/ggml-alloc.c +24 -15
  82. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +1 -1
  83. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +58 -54
  84. package/src/llama.cpp/ggml/src/ggml-backend.cpp +10 -8
  85. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +3 -2
  86. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +3 -5
  87. package/src/llama.cpp/ggml/src/ggml-common.h +0 -2
  88. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +132 -17
  89. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +2 -1
  90. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +4 -0
  91. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -1
  92. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +156 -11
  93. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +2235 -641
  94. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1572 -198
  95. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +24 -5
  96. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +259 -0
  97. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +61 -0
  98. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +288 -0
  99. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  100. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +9 -8
  101. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +16 -3
  102. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +14 -0
  103. package/src/llama.cpp/ggml/src/ggml-impl.h +1 -1
  104. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -5
  105. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +235 -0
  106. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +6 -2
  107. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +1 -0
  108. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +246 -120
  109. package/src/llama.cpp/ggml/src/ggml-quants.c +114 -114
  110. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2 -1
  111. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +2 -0
  112. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
  113. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +17 -0
  114. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +51 -10
  115. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +33 -4
  116. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +2 -2
  117. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
  118. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
  119. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +55 -0
  120. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +136 -4
  121. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +308 -0
  122. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +23 -0
  123. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +174 -728
  124. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -77
  125. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -0
  126. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
  127. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
  128. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +949 -602
  129. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +37 -3
  130. package/src/llama.cpp/ggml/src/ggml.c +9 -4
  131. package/src/llama.cpp/include/llama.h +32 -14
  132. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
  133. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
  134. package/src/llama.cpp/requirements/requirements-all.txt +1 -0
  135. package/src/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
  136. package/src/llama.cpp/requirements.txt +1 -0
  137. package/src/llama.cpp/src/llama-arch.cpp +21 -0
  138. package/src/llama.cpp/src/llama-arch.h +1 -0
  139. package/src/llama.cpp/src/llama-chat.cpp +1 -0
  140. package/src/llama.cpp/src/llama-grammar.cpp +183 -183
  141. package/src/llama.cpp/src/llama-grammar.h +13 -4
  142. package/src/llama.cpp/src/llama-impl.h +6 -6
  143. package/src/llama.cpp/src/llama-kv-cache.h +2 -1
  144. package/src/llama.cpp/src/llama-mmap.cpp +11 -1
  145. package/src/llama.cpp/src/llama-mmap.h +1 -0
  146. package/src/llama.cpp/src/llama-model.cpp +70 -6
  147. package/src/llama.cpp/src/llama-sampling.cpp +174 -67
  148. package/src/llama.cpp/src/llama-vocab.cpp +12 -0
  149. package/src/llama.cpp/src/llama.cpp +154 -5
  150. package/src/llama.cpp/src/unicode.cpp +9 -2
  151. package/src/llama.cpp/tests/test-backend-ops.cpp +171 -115
  152. package/src/llama.cpp/tests/test-chat-template.cpp +32 -22
  153. package/src/llama.cpp/tests/test-chat.cpp +691 -325
  154. package/src/llama.cpp/tests/test-gguf.cpp +4 -4
  155. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +63 -63
  156. package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -9
  157. package/src/llama.cpp/tests/test-sampling.cpp +15 -0
  158. package/src/llama.cpp/Sources/llama/llama.h +0 -4
  159. package/src/llama.cpp/common/chat.hpp +0 -52
@@ -226,6 +226,9 @@ add_library(ggml-base
226
226
  gguf.cpp)
227
227
 
228
228
  target_include_directories(ggml-base PRIVATE .)
229
+ if (GGML_BACKEND_DL)
230
+ target_compile_definitions(ggml-base PUBLIC GGML_BACKEND_DL)
231
+ endif()
229
232
 
230
233
  add_library(ggml
231
234
  ggml-backend-reg.cpp)
@@ -233,7 +236,7 @@ add_library(ggml
233
236
  target_link_libraries(ggml PUBLIC ggml-base)
234
237
 
235
238
  if (CMAKE_SYSTEM_NAME MATCHES "Linux")
236
- target_link_libraries(ggml PRIVATE dl)
239
+ target_link_libraries(ggml PRIVATE dl stdc++fs)
237
240
  endif()
238
241
 
239
242
  function(ggml_add_backend_library backend)
@@ -286,7 +289,7 @@ function(ggml_add_cpu_backend_variant tag_name)
286
289
  set(GGML_CPU_TAG_NAME ${tag_name})
287
290
  # other: OPENMP LLAMAFILE CPU_HBM
288
291
  foreach (feat NATIVE
289
- AVX AVX2 AVX_VNNI FMA F16C
292
+ AVX AVX2 BMI2 AVX_VNNI FMA F16C
290
293
  AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
291
294
  AMX_TILE AMX_INT8 AMX_BF16)
292
295
  set(GGML_${feat} OFF)
@@ -306,13 +309,13 @@ if (GGML_CPU_ALL_VARIANTS)
306
309
  message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
307
310
  endif()
308
311
  ggml_add_cpu_backend_variant(sandybridge AVX)
309
- ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 FMA)
310
- ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
311
- ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
312
- ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
312
+ ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 BMI2 FMA)
313
+ ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 BMI2 FMA AVX512)
314
+ ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
315
+ ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 BMI2 FMA AVX_VNNI)
313
316
  if (NOT MSVC)
314
317
  # MSVC doesn't support AMX
315
- ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
318
+ ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
316
319
  endif()
317
320
  elseif (GGML_CPU)
318
321
  ggml_add_cpu_backend_variant_impl("")
@@ -89,7 +89,7 @@ struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer) {
89
89
  return talloc;
90
90
  }
91
91
 
92
- void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
92
+ enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
93
93
  size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor);
94
94
  size = GGML_PAD(size, talloc->alignment);
95
95
 
@@ -104,7 +104,7 @@ void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tenso
104
104
 
105
105
  assert(((uintptr_t)addr % talloc->alignment) == 0);
106
106
 
107
- ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
107
+ return ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
108
108
  }
109
109
 
110
110
  // dynamic tensor allocator
@@ -933,42 +933,51 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
933
933
 
934
934
  // utils
935
935
 
936
+ static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) {
937
+ for (size_t i = 0; i < *n_buffers; i++) {
938
+ ggml_backend_buffer_free((*buffers)[i]);
939
+ }
940
+ free(*buffers);
941
+ }
942
+
936
943
  static bool alloc_tensor_range(struct ggml_context * ctx,
937
944
  struct ggml_tensor * first, struct ggml_tensor * last,
938
945
  ggml_backend_buffer_type_t buft, size_t size,
939
946
  ggml_backend_buffer_t ** buffers, size_t * n_buffers) {
947
+
940
948
  ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);
941
949
  if (buffer == NULL) {
942
- #ifndef NDEBUG
943
- GGML_LOG_DEBUG("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
944
- #endif
945
- for (size_t i = 0; i < *n_buffers; i++) {
946
- ggml_backend_buffer_free((*buffers)[i]);
947
- }
948
- free(*buffers);
950
+ GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
951
+ free_buffers(buffers, n_buffers);
949
952
  return false;
950
953
  }
951
954
 
955
+ *buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
956
+ (*buffers)[(*n_buffers)++] = buffer;
957
+
952
958
  struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
953
959
 
954
960
  for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
961
+ enum ggml_status status = GGML_STATUS_SUCCESS;
955
962
  if (t->data == NULL) {
956
963
  if (t->view_src == NULL) {
957
- ggml_tallocr_alloc(&tallocr, t);
964
+ status = ggml_tallocr_alloc(&tallocr, t);
958
965
  } else if (t->buffer == NULL) {
959
- ggml_backend_view_init(t);
966
+ status = ggml_backend_view_init(t);
960
967
  }
961
968
  } else {
962
969
  if (t->view_src != NULL && t->buffer == NULL) {
963
970
  // view of a pre-allocated tensor
964
- ggml_backend_view_init(t);
971
+ status = ggml_backend_view_init(t);
965
972
  }
966
973
  }
974
+ if (status != GGML_STATUS_SUCCESS) {
975
+ GGML_LOG_ERROR("%s: failed to initialize tensor %s\n", __func__, t->name);
976
+ free_buffers(buffers, n_buffers);
977
+ return false;
978
+ }
967
979
  }
968
980
 
969
- *buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
970
- (*buffers)[(*n_buffers)++] = buffer;
971
-
972
981
  return true;
973
982
  }
974
983
 
@@ -44,7 +44,7 @@ extern "C" {
44
44
  // base address of the buffer
45
45
  void * (*get_base) (ggml_backend_buffer_t buffer);
46
46
  // (optional) initialize a tensor in the buffer (eg. add tensor extras)
47
- void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
47
+ enum ggml_status (*init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
48
48
  // tensor data access
49
49
  void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
50
50
  void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
@@ -2,14 +2,13 @@
2
2
  #include "ggml-backend.h"
3
3
  #include "ggml-impl.h"
4
4
  #include <algorithm>
5
- #include <codecvt>
6
5
  #include <cstring>
7
6
  #include <filesystem>
8
- #include <locale>
9
7
  #include <memory>
10
8
  #include <string>
11
9
  #include <type_traits>
12
10
  #include <vector>
11
+ #include <cctype>
13
12
 
14
13
  #ifdef _WIN32
15
14
  # define WIN32_LEAN_AND_MEAN
@@ -72,14 +71,22 @@
72
71
  # pragma clang diagnostic ignored "-Wdeprecated-declarations"
73
72
  #endif
74
73
 
75
- static std::wstring utf8_to_utf16(const std::string & str) {
76
- std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
77
- return converter.from_bytes(str);
78
- }
74
+ namespace fs = std::filesystem;
79
75
 
80
- static std::string utf16_to_utf8(const std::wstring & str) {
81
- std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
82
- return converter.to_bytes(str);
76
+ static std::string path_str(const fs::path & path) {
77
+ std::string u8path;
78
+ try {
79
+ #if defined(__cpp_lib_char8_t)
80
+ // C++20 and later: u8string() returns std::u8string
81
+ std::u8string u8str = path.u8string();
82
+ u8path = std::string(reinterpret_cast<const char*>(u8str.c_str()));
83
+ #else
84
+ // C++17: u8string() returns std::string
85
+ u8path = path.u8string();
86
+ #endif
87
+ } catch (...) {
88
+ }
89
+ return u8path;
83
90
  }
84
91
 
85
92
  #if defined(__clang__)
@@ -96,12 +103,12 @@ struct dl_handle_deleter {
96
103
  }
97
104
  };
98
105
 
99
- static dl_handle * dl_load_library(const std::wstring & path) {
106
+ static dl_handle * dl_load_library(const fs::path & path) {
100
107
  // suppress error dialogs for missing DLLs
101
108
  DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
102
109
  SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
103
110
 
104
- HMODULE handle = LoadLibraryW(path.c_str());
111
+ HMODULE handle = LoadLibraryW(path.wstring().c_str());
105
112
 
106
113
  SetErrorMode(old_mode);
107
114
 
@@ -129,8 +136,8 @@ struct dl_handle_deleter {
129
136
  }
130
137
  };
131
138
 
132
- static void * dl_load_library(const std::wstring & path) {
133
- dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL);
139
+ static void * dl_load_library(const fs::path & path) {
140
+ dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
134
141
 
135
142
  return handle;
136
143
  }
@@ -217,11 +224,11 @@ struct ggml_backend_registry {
217
224
  devices.push_back(device);
218
225
  }
219
226
 
220
- ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
227
+ ggml_backend_reg_t load_backend(const fs::path & path, bool silent) {
221
228
  dl_handle_ptr handle { dl_load_library(path) };
222
229
  if (!handle) {
223
230
  if (!silent) {
224
- GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str());
231
+ GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path_str(path).c_str());
225
232
  }
226
233
  return nullptr;
227
234
  }
@@ -229,7 +236,7 @@ struct ggml_backend_registry {
229
236
  auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
230
237
  if (score_fn && score_fn() == 0) {
231
238
  if (!silent) {
232
- GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str());
239
+ GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path_str(path).c_str());
233
240
  }
234
241
  return nullptr;
235
242
  }
@@ -237,7 +244,7 @@ struct ggml_backend_registry {
237
244
  auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
238
245
  if (!backend_init_fn) {
239
246
  if (!silent) {
240
- GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str());
247
+ GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path_str(path).c_str());
241
248
  }
242
249
  return nullptr;
243
250
  }
@@ -246,16 +253,17 @@ struct ggml_backend_registry {
246
253
  if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
247
254
  if (!silent) {
248
255
  if (!reg) {
249
- GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str());
256
+ GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n",
257
+ __func__, path_str(path).c_str());
250
258
  } else {
251
259
  GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
252
- __func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
260
+ __func__, path_str(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
253
261
  }
254
262
  }
255
263
  return nullptr;
256
264
  }
257
265
 
258
- GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
266
+ GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path_str(path).c_str());
259
267
 
260
268
  register_backend(reg, std::move(handle));
261
269
 
@@ -391,14 +399,14 @@ ggml_backend_t ggml_backend_init_best(void) {
391
399
 
392
400
  // Dynamic loading
393
401
  ggml_backend_reg_t ggml_backend_load(const char * path) {
394
- return get_reg().load_backend(utf8_to_utf16(path), false);
402
+ return get_reg().load_backend(path, false);
395
403
  }
396
404
 
397
405
  void ggml_backend_unload(ggml_backend_reg_t reg) {
398
406
  get_reg().unload_backend(reg, true);
399
407
  }
400
408
 
401
- static std::wstring get_executable_path() {
409
+ static fs::path get_executable_path() {
402
410
  #if defined(__APPLE__)
403
411
  // get executable path
404
412
  std::vector<char> path;
@@ -416,7 +424,7 @@ static std::wstring get_executable_path() {
416
424
  if (last_slash != std::string::npos) {
417
425
  base_path = base_path.substr(0, last_slash);
418
426
  }
419
- return utf8_to_utf16(base_path + "/");
427
+ return base_path + "/";
420
428
  #elif defined(__linux__) || defined(__FreeBSD__)
421
429
  std::string base_path = ".";
422
430
  std::vector<char> path(1024);
@@ -442,7 +450,7 @@ static std::wstring get_executable_path() {
442
450
  path.resize(path.size() * 2);
443
451
  }
444
452
 
445
- return utf8_to_utf16(base_path + "/");
453
+ return base_path + "/";
446
454
  #elif defined(_WIN32)
447
455
  std::vector<wchar_t> path(MAX_PATH);
448
456
  DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
@@ -461,74 +469,69 @@ static std::wstring get_executable_path() {
461
469
  #endif
462
470
  }
463
471
 
464
- static std::wstring backend_filename_prefix() {
465
- #ifdef _WIN32
466
- return L"ggml-";
467
- #else
468
- return L"libggml-";
469
- #endif
470
- }
471
-
472
- static std::wstring backend_filename_suffix() {
472
+ static fs::path backend_filename_prefix() {
473
473
  #ifdef _WIN32
474
- return L".dll";
474
+ return fs::u8path("ggml-");
475
475
  #else
476
- return L".so";
476
+ return fs::u8path("libggml-");
477
477
  #endif
478
478
  }
479
479
 
480
- static std::wstring path_separator() {
480
+ static fs::path backend_filename_extension() {
481
481
  #ifdef _WIN32
482
- return L"\\";
482
+ return fs::u8path(".dll");
483
483
  #else
484
- return L"/";
484
+ return fs::u8path(".so");
485
485
  #endif
486
486
  }
487
487
 
488
488
  static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
489
489
  // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
490
- // TODO: search system paths
491
- std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-";
492
- std::vector<std::wstring> search_paths;
490
+ const fs::path name_path = fs::u8path(name);
491
+ const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native();
492
+ const fs::path file_extension = backend_filename_extension();
493
+
494
+ std::vector<fs::path> search_paths;
493
495
  if (user_search_path == nullptr) {
494
- search_paths.push_back(L"." + path_separator());
496
+ // default search paths: executable directory, current directory
495
497
  search_paths.push_back(get_executable_path());
498
+ search_paths.push_back(fs::current_path());
496
499
  } else {
497
- search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator());
500
+ search_paths.push_back(fs::u8path(user_search_path));
498
501
  }
499
502
 
500
503
  int best_score = 0;
501
- std::wstring best_path;
504
+ fs::path best_path;
502
505
 
503
- namespace fs = std::filesystem;
504
506
  for (const auto & search_path : search_paths) {
505
507
  if (!fs::exists(search_path)) {
508
+ GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str());
506
509
  continue;
507
510
  }
508
511
  fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
509
512
  for (const auto & entry : dir_it) {
510
513
  if (entry.is_regular_file()) {
511
- std::wstring filename = entry.path().filename().wstring();
512
- std::wstring ext = entry.path().extension().wstring();
513
- if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
514
- dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
514
+ auto filename = entry.path().filename();
515
+ auto ext = entry.path().extension();
516
+ if (filename.native().find(file_prefix) == 0 && ext == file_extension) {
517
+ dl_handle_ptr handle { dl_load_library(entry) };
515
518
  if (!handle && !silent) {
516
- GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
519
+ GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path_str(entry.path()).c_str());
517
520
  }
518
521
  if (handle) {
519
522
  auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
520
523
  if (score_fn) {
521
524
  int s = score_fn();
522
525
  #ifndef NDEBUG
523
- GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
526
+ GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, path_str(entry.path()).c_str(), s);
524
527
  #endif
525
528
  if (s > best_score) {
526
529
  best_score = s;
527
- best_path = entry.path().wstring();
530
+ best_path = entry.path();
528
531
  }
529
532
  } else {
530
533
  if (!silent) {
531
- GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
534
+ GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, path_str(entry.path()).c_str());
532
535
  }
533
536
  }
534
537
  }
@@ -540,7 +543,8 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
540
543
  if (best_score == 0) {
541
544
  // try to load the base backend
542
545
  for (const auto & search_path : search_paths) {
543
- std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix();
546
+ fs::path filename = backend_filename_prefix().native() + name_path.native() + backend_filename_extension().native();
547
+ fs::path path = search_path / filename;
544
548
  if (fs::exists(path)) {
545
549
  return get_reg().load_backend(path, silent);
546
550
  }
@@ -21,6 +21,7 @@
21
21
  #include <string.h>
22
22
  #include <string>
23
23
  #include <vector>
24
+ #include <algorithm>
24
25
 
25
26
  #ifdef __APPLE__
26
27
  #include <sys/types.h>
@@ -126,11 +127,12 @@ void * ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) {
126
127
  return base;
127
128
  }
128
129
 
129
- void ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
130
+ enum ggml_status ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
130
131
  // init_tensor is optional
131
132
  if (buffer->iface.init_tensor) {
132
- buffer->iface.init_tensor(buffer, tensor);
133
+ return buffer->iface.init_tensor(buffer, tensor);
133
134
  }
135
+ return GGML_STATUS_SUCCESS;
134
136
  }
135
137
 
136
138
  void ggml_backend_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
@@ -1641,7 +1643,7 @@ ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched,
1641
1643
 
1642
1644
  // utils
1643
1645
 
1644
- void ggml_backend_view_init(struct ggml_tensor * tensor) {
1646
+ enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor) {
1645
1647
  GGML_ASSERT(tensor->buffer == NULL);
1646
1648
  GGML_ASSERT(tensor->view_src != NULL);
1647
1649
  GGML_ASSERT(tensor->view_src->buffer != NULL);
@@ -1649,10 +1651,10 @@ void ggml_backend_view_init(struct ggml_tensor * tensor) {
1649
1651
 
1650
1652
  tensor->buffer = tensor->view_src->buffer;
1651
1653
  tensor->data = (char *)tensor->view_src->data + tensor->view_offs;
1652
- ggml_backend_buffer_init_tensor(tensor->buffer, tensor);
1654
+ return ggml_backend_buffer_init_tensor(tensor->buffer, tensor);
1653
1655
  }
1654
1656
 
1655
- void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) {
1657
+ enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) {
1656
1658
  GGML_ASSERT(tensor->buffer == NULL);
1657
1659
  GGML_ASSERT(tensor->data == NULL);
1658
1660
  GGML_ASSERT(tensor->view_src == NULL);
@@ -1662,7 +1664,7 @@ void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor
1662
1664
 
1663
1665
  tensor->buffer = buffer;
1664
1666
  tensor->data = addr;
1665
- ggml_backend_buffer_init_tensor(buffer, tensor);
1667
+ return ggml_backend_buffer_init_tensor(buffer, tensor);
1666
1668
  }
1667
1669
 
1668
1670
  static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies,
@@ -1708,7 +1710,8 @@ static void graph_copy_init_tensor(struct ggml_hash_set * hash_set, struct ggml_
1708
1710
  struct ggml_tensor * dst = node_copies[id];
1709
1711
  if (dst->view_src != NULL) {
1710
1712
  graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src);
1711
- ggml_backend_view_init(dst);
1713
+ enum ggml_status status = ggml_backend_view_init(dst);
1714
+ GGML_ASSERT(status == GGML_STATUS_SUCCESS);
1712
1715
  }
1713
1716
  else {
1714
1717
  ggml_backend_tensor_copy(src, dst);
@@ -1823,7 +1826,6 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t
1823
1826
  assert(g1->n_nodes == g2->n_nodes);
1824
1827
 
1825
1828
  for (int i = 0; i < g1->n_nodes; i++) {
1826
- //printf("eval %d/%d\n", i, g1->n_nodes);
1827
1829
  struct ggml_tensor * t1 = g1->nodes[i];
1828
1830
  struct ggml_tensor * t2 = g2->nodes[i];
1829
1831
 
@@ -796,11 +796,11 @@ static bool need_transform(ggml_type type) {
796
796
  * @param buffer The CANN buffer from which to initialize the tensor.
797
797
  * @param tensor Pointer to the tensor to be initialized.
798
798
  */
799
- static void ggml_backend_cann_buffer_init_tensor(
799
+ static enum ggml_status ggml_backend_cann_buffer_init_tensor(
800
800
  ggml_backend_buffer_t buffer, ggml_tensor* tensor) {
801
801
  if (tensor->view_src != NULL && tensor->view_offs == 0) {
802
802
  GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
803
- return;
803
+ return GGML_STATUS_SUCCESS;
804
804
  }
805
805
 
806
806
  // TODO: can backend doesn't support quantized yet. Just leave the code
@@ -817,6 +817,7 @@ static void ggml_backend_cann_buffer_init_tensor(
817
817
  memset_size, 0, memset_size));
818
818
  }
819
819
  }
820
+ return GGML_STATUS_SUCCESS;
820
821
  }
821
822
 
822
823
  // TODO: need handle tensor which has paddings.
@@ -1,7 +1,5 @@
1
1
  #include "kernel_operator.h"
2
2
 
3
- #include <cmath>
4
-
5
3
  using namespace AscendC;
6
4
 
7
5
  #define BUFFER_NUM 2
@@ -183,7 +181,7 @@ extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp32(
183
181
  copy_to_ub(output_ne_gm, output_ne_ub, 32);
184
182
  copy_to_ub(output_nb_gm, output_nb_ub, 32);
185
183
 
186
- DupByRows<float_t, float_t> op;
184
+ DupByRows<float, float> op;
187
185
  op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub);
188
186
  op.dup();
189
187
  }
@@ -206,7 +204,7 @@ extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp32_to_fp16(
206
204
  copy_to_ub(output_ne_gm, output_ne_ub, 32);
207
205
  copy_to_ub(output_nb_gm, output_nb_ub, 32);
208
206
 
209
- DupByRows<float_t, half> op;
207
+ DupByRows<float, half> op;
210
208
  op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub);
211
209
  op.dup_with_cast();
212
210
  }
@@ -230,7 +228,7 @@ extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp16_to_fp32(
230
228
  copy_to_ub(output_ne_gm, output_ne_ub, 32);
231
229
  copy_to_ub(output_nb_gm, output_nb_ub, 32);
232
230
 
233
- DupByRows<half, float_t> op;
231
+ DupByRows<half, float> op;
234
232
  op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub);
235
233
  op.dup_with_cast();
236
234
  }
@@ -473,7 +473,6 @@ GGML_TABLE_BEGIN(uint8_t, ksigns_iq2xs, 128)
473
473
  240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
474
474
  GGML_TABLE_END()
475
475
 
476
- //#if __CUDA_ARCH__ >= GGML_CUDA_CC_DP4A // lowest compute capability for integer intrinsics
477
476
  GGML_TABLE_BEGIN(uint64_t, ksigns64, 128)
478
477
  0x0000000000000000, 0xff000000000000ff, 0xff0000000000ff00, 0x000000000000ffff,
479
478
  0xff00000000ff0000, 0x0000000000ff00ff, 0x0000000000ffff00, 0xff00000000ffffff,
@@ -508,7 +507,6 @@ GGML_TABLE_BEGIN(uint64_t, ksigns64, 128)
508
507
  0x00ffffffff000000, 0xffffffffff0000ff, 0xffffffffff00ff00, 0x00ffffffff00ffff,
509
508
  0xffffffffffff0000, 0x00ffffffffff00ff, 0x00ffffffffffff00, 0xffffffffffffffff,
510
509
  GGML_TABLE_END()
511
- //#endif
512
510
 
513
511
 
514
512
  GGML_TABLE_BEGIN(uint64_t, iq2xxs_grid, 256)