@fugood/llama.node 0.3.12 → 0.3.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +2 -1
- package/package.json +1 -1
- package/src/LlamaCompletionWorker.cpp +14 -0
- package/src/LlamaContext.cpp +110 -79
- package/src/LlamaContext.h +1 -1
- package/src/common.hpp +1 -2
- package/src/llama.cpp/.github/workflows/build.yml +95 -13
- package/src/llama.cpp/.github/workflows/docker.yml +2 -0
- package/src/llama.cpp/.github/workflows/labeler.yml +1 -1
- package/src/llama.cpp/.github/workflows/server.yml +2 -0
- package/src/llama.cpp/common/CMakeLists.txt +23 -6
- package/src/llama.cpp/common/arg.cpp +292 -14
- package/src/llama.cpp/common/chat.cpp +1128 -315
- package/src/llama.cpp/common/chat.h +135 -0
- package/src/llama.cpp/common/common.cpp +27 -171
- package/src/llama.cpp/common/common.h +41 -73
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +4 -5
- package/src/llama.cpp/common/json-schema-to-grammar.h +0 -1
- package/src/llama.cpp/common/llguidance.cpp +3 -3
- package/src/llama.cpp/common/log.cpp +1 -0
- package/src/llama.cpp/common/log.h +2 -1
- package/src/llama.cpp/common/{chat-template.hpp → minja/chat-template.hpp} +21 -7
- package/src/llama.cpp/common/{minja.hpp → minja/minja.hpp} +61 -14
- package/src/llama.cpp/common/ngram-cache.cpp +1 -0
- package/src/llama.cpp/common/sampling.cpp +93 -49
- package/src/llama.cpp/common/speculative.cpp +6 -5
- package/src/llama.cpp/common/speculative.h +1 -1
- package/src/llama.cpp/docs/build.md +47 -9
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +3 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +1 -0
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +4 -2
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +4 -4
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +6 -5
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +1 -1
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +373 -107
- package/src/llama.cpp/examples/llava/clip.h +19 -3
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +341 -0
- package/src/llama.cpp/examples/llava/llava.cpp +4 -2
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +30 -11
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -0
- package/src/llama.cpp/examples/main/main.cpp +73 -28
- package/src/llama.cpp/examples/parallel/parallel.cpp +1 -0
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -0
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +1 -0
- package/src/llama.cpp/examples/quantize/quantize.cpp +1 -0
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +882 -237
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +35 -26
- package/src/llama.cpp/examples/run/run.cpp +115 -79
- package/src/llama.cpp/examples/server/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/server/httplib.h +381 -292
- package/src/llama.cpp/examples/server/server.cpp +134 -128
- package/src/llama.cpp/examples/server/utils.hpp +95 -106
- package/src/llama.cpp/examples/sycl/run-llama2.sh +2 -2
- package/src/llama.cpp/examples/tts/tts.cpp +251 -142
- package/src/llama.cpp/ggml/CMakeLists.txt +13 -1
- package/src/llama.cpp/ggml/include/ggml-alloc.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-backend.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-cpu.h +4 -1
- package/src/llama.cpp/ggml/include/ggml-metal.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +0 -2
- package/src/llama.cpp/ggml/include/ggml.h +6 -2
- package/src/llama.cpp/ggml/src/CMakeLists.txt +10 -7
- package/src/llama.cpp/ggml/src/ggml-alloc.c +24 -15
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +58 -54
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +3 -5
- package/src/llama.cpp/ggml/src/ggml-common.h +0 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +132 -17
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +156 -11
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +2235 -641
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1572 -198
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +24 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +259 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +61 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +288 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +9 -8
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +16 -3
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +14 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -5
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +235 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +6 -2
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +1 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +246 -120
- package/src/llama.cpp/ggml/src/ggml-quants.c +114 -114
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +51 -10
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +33 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +136 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +308 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +174 -728
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -77
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +949 -602
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +37 -3
- package/src/llama.cpp/ggml/src/ggml.c +9 -4
- package/src/llama.cpp/include/llama.h +32 -14
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +1 -0
- package/src/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
- package/src/llama.cpp/requirements.txt +1 -0
- package/src/llama.cpp/src/llama-arch.cpp +21 -0
- package/src/llama.cpp/src/llama-arch.h +1 -0
- package/src/llama.cpp/src/llama-chat.cpp +1 -0
- package/src/llama.cpp/src/llama-grammar.cpp +183 -183
- package/src/llama.cpp/src/llama-grammar.h +13 -4
- package/src/llama.cpp/src/llama-impl.h +6 -6
- package/src/llama.cpp/src/llama-kv-cache.h +2 -1
- package/src/llama.cpp/src/llama-mmap.cpp +11 -1
- package/src/llama.cpp/src/llama-mmap.h +1 -0
- package/src/llama.cpp/src/llama-model.cpp +70 -6
- package/src/llama.cpp/src/llama-sampling.cpp +174 -67
- package/src/llama.cpp/src/llama-vocab.cpp +12 -0
- package/src/llama.cpp/src/llama.cpp +154 -5
- package/src/llama.cpp/src/unicode.cpp +9 -2
- package/src/llama.cpp/tests/test-backend-ops.cpp +171 -115
- package/src/llama.cpp/tests/test-chat-template.cpp +32 -22
- package/src/llama.cpp/tests/test-chat.cpp +691 -325
- package/src/llama.cpp/tests/test-gguf.cpp +4 -4
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +63 -63
- package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -9
- package/src/llama.cpp/tests/test-sampling.cpp +15 -0
- package/src/llama.cpp/Sources/llama/llama.h +0 -4
- package/src/llama.cpp/common/chat.hpp +0 -52
|
@@ -226,6 +226,9 @@ add_library(ggml-base
|
|
|
226
226
|
gguf.cpp)
|
|
227
227
|
|
|
228
228
|
target_include_directories(ggml-base PRIVATE .)
|
|
229
|
+
if (GGML_BACKEND_DL)
|
|
230
|
+
target_compile_definitions(ggml-base PUBLIC GGML_BACKEND_DL)
|
|
231
|
+
endif()
|
|
229
232
|
|
|
230
233
|
add_library(ggml
|
|
231
234
|
ggml-backend-reg.cpp)
|
|
@@ -233,7 +236,7 @@ add_library(ggml
|
|
|
233
236
|
target_link_libraries(ggml PUBLIC ggml-base)
|
|
234
237
|
|
|
235
238
|
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
|
236
|
-
target_link_libraries(ggml PRIVATE dl)
|
|
239
|
+
target_link_libraries(ggml PRIVATE dl stdc++fs)
|
|
237
240
|
endif()
|
|
238
241
|
|
|
239
242
|
function(ggml_add_backend_library backend)
|
|
@@ -286,7 +289,7 @@ function(ggml_add_cpu_backend_variant tag_name)
|
|
|
286
289
|
set(GGML_CPU_TAG_NAME ${tag_name})
|
|
287
290
|
# other: OPENMP LLAMAFILE CPU_HBM
|
|
288
291
|
foreach (feat NATIVE
|
|
289
|
-
AVX AVX2 AVX_VNNI FMA F16C
|
|
292
|
+
AVX AVX2 BMI2 AVX_VNNI FMA F16C
|
|
290
293
|
AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
|
|
291
294
|
AMX_TILE AMX_INT8 AMX_BF16)
|
|
292
295
|
set(GGML_${feat} OFF)
|
|
@@ -306,13 +309,13 @@ if (GGML_CPU_ALL_VARIANTS)
|
|
|
306
309
|
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
|
|
307
310
|
endif()
|
|
308
311
|
ggml_add_cpu_backend_variant(sandybridge AVX)
|
|
309
|
-
ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 FMA)
|
|
310
|
-
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
|
|
311
|
-
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
|
|
312
|
-
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
|
|
312
|
+
ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 BMI2 FMA)
|
|
313
|
+
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 BMI2 FMA AVX512)
|
|
314
|
+
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
|
|
315
|
+
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 BMI2 FMA AVX_VNNI)
|
|
313
316
|
if (NOT MSVC)
|
|
314
317
|
# MSVC doesn't support AMX
|
|
315
|
-
ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
|
318
|
+
ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
|
316
319
|
endif()
|
|
317
320
|
elseif (GGML_CPU)
|
|
318
321
|
ggml_add_cpu_backend_variant_impl("")
|
|
@@ -89,7 +89,7 @@ struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer) {
|
|
|
89
89
|
return talloc;
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
-
|
|
92
|
+
enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
|
|
93
93
|
size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor);
|
|
94
94
|
size = GGML_PAD(size, talloc->alignment);
|
|
95
95
|
|
|
@@ -104,7 +104,7 @@ void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tenso
|
|
|
104
104
|
|
|
105
105
|
assert(((uintptr_t)addr % talloc->alignment) == 0);
|
|
106
106
|
|
|
107
|
-
ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
|
|
107
|
+
return ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
|
|
108
108
|
}
|
|
109
109
|
|
|
110
110
|
// dynamic tensor allocator
|
|
@@ -933,42 +933,51 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
|
|
|
933
933
|
|
|
934
934
|
// utils
|
|
935
935
|
|
|
936
|
+
static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) {
|
|
937
|
+
for (size_t i = 0; i < *n_buffers; i++) {
|
|
938
|
+
ggml_backend_buffer_free((*buffers)[i]);
|
|
939
|
+
}
|
|
940
|
+
free(*buffers);
|
|
941
|
+
}
|
|
942
|
+
|
|
936
943
|
static bool alloc_tensor_range(struct ggml_context * ctx,
|
|
937
944
|
struct ggml_tensor * first, struct ggml_tensor * last,
|
|
938
945
|
ggml_backend_buffer_type_t buft, size_t size,
|
|
939
946
|
ggml_backend_buffer_t ** buffers, size_t * n_buffers) {
|
|
947
|
+
|
|
940
948
|
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);
|
|
941
949
|
if (buffer == NULL) {
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
#endif
|
|
945
|
-
for (size_t i = 0; i < *n_buffers; i++) {
|
|
946
|
-
ggml_backend_buffer_free((*buffers)[i]);
|
|
947
|
-
}
|
|
948
|
-
free(*buffers);
|
|
950
|
+
GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
|
|
951
|
+
free_buffers(buffers, n_buffers);
|
|
949
952
|
return false;
|
|
950
953
|
}
|
|
951
954
|
|
|
955
|
+
*buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
|
|
956
|
+
(*buffers)[(*n_buffers)++] = buffer;
|
|
957
|
+
|
|
952
958
|
struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
|
|
953
959
|
|
|
954
960
|
for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
|
|
961
|
+
enum ggml_status status = GGML_STATUS_SUCCESS;
|
|
955
962
|
if (t->data == NULL) {
|
|
956
963
|
if (t->view_src == NULL) {
|
|
957
|
-
ggml_tallocr_alloc(&tallocr, t);
|
|
964
|
+
status = ggml_tallocr_alloc(&tallocr, t);
|
|
958
965
|
} else if (t->buffer == NULL) {
|
|
959
|
-
ggml_backend_view_init(t);
|
|
966
|
+
status = ggml_backend_view_init(t);
|
|
960
967
|
}
|
|
961
968
|
} else {
|
|
962
969
|
if (t->view_src != NULL && t->buffer == NULL) {
|
|
963
970
|
// view of a pre-allocated tensor
|
|
964
|
-
ggml_backend_view_init(t);
|
|
971
|
+
status = ggml_backend_view_init(t);
|
|
965
972
|
}
|
|
966
973
|
}
|
|
974
|
+
if (status != GGML_STATUS_SUCCESS) {
|
|
975
|
+
GGML_LOG_ERROR("%s: failed to initialize tensor %s\n", __func__, t->name);
|
|
976
|
+
free_buffers(buffers, n_buffers);
|
|
977
|
+
return false;
|
|
978
|
+
}
|
|
967
979
|
}
|
|
968
980
|
|
|
969
|
-
*buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
|
|
970
|
-
(*buffers)[(*n_buffers)++] = buffer;
|
|
971
|
-
|
|
972
981
|
return true;
|
|
973
982
|
}
|
|
974
983
|
|
|
@@ -44,7 +44,7 @@ extern "C" {
|
|
|
44
44
|
// base address of the buffer
|
|
45
45
|
void * (*get_base) (ggml_backend_buffer_t buffer);
|
|
46
46
|
// (optional) initialize a tensor in the buffer (eg. add tensor extras)
|
|
47
|
-
|
|
47
|
+
enum ggml_status (*init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
|
48
48
|
// tensor data access
|
|
49
49
|
void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
|
|
50
50
|
void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
|
@@ -2,14 +2,13 @@
|
|
|
2
2
|
#include "ggml-backend.h"
|
|
3
3
|
#include "ggml-impl.h"
|
|
4
4
|
#include <algorithm>
|
|
5
|
-
#include <codecvt>
|
|
6
5
|
#include <cstring>
|
|
7
6
|
#include <filesystem>
|
|
8
|
-
#include <locale>
|
|
9
7
|
#include <memory>
|
|
10
8
|
#include <string>
|
|
11
9
|
#include <type_traits>
|
|
12
10
|
#include <vector>
|
|
11
|
+
#include <cctype>
|
|
13
12
|
|
|
14
13
|
#ifdef _WIN32
|
|
15
14
|
# define WIN32_LEAN_AND_MEAN
|
|
@@ -72,14 +71,22 @@
|
|
|
72
71
|
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
|
73
72
|
#endif
|
|
74
73
|
|
|
75
|
-
|
|
76
|
-
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
|
77
|
-
return converter.from_bytes(str);
|
|
78
|
-
}
|
|
74
|
+
namespace fs = std::filesystem;
|
|
79
75
|
|
|
80
|
-
static std::string
|
|
81
|
-
std::
|
|
82
|
-
|
|
76
|
+
static std::string path_str(const fs::path & path) {
|
|
77
|
+
std::string u8path;
|
|
78
|
+
try {
|
|
79
|
+
#if defined(__cpp_lib_char8_t)
|
|
80
|
+
// C++20 and later: u8string() returns std::u8string
|
|
81
|
+
std::u8string u8str = path.u8string();
|
|
82
|
+
u8path = std::string(reinterpret_cast<const char*>(u8str.c_str()));
|
|
83
|
+
#else
|
|
84
|
+
// C++17: u8string() returns std::string
|
|
85
|
+
u8path = path.u8string();
|
|
86
|
+
#endif
|
|
87
|
+
} catch (...) {
|
|
88
|
+
}
|
|
89
|
+
return u8path;
|
|
83
90
|
}
|
|
84
91
|
|
|
85
92
|
#if defined(__clang__)
|
|
@@ -96,12 +103,12 @@ struct dl_handle_deleter {
|
|
|
96
103
|
}
|
|
97
104
|
};
|
|
98
105
|
|
|
99
|
-
static dl_handle * dl_load_library(const
|
|
106
|
+
static dl_handle * dl_load_library(const fs::path & path) {
|
|
100
107
|
// suppress error dialogs for missing DLLs
|
|
101
108
|
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
|
102
109
|
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
|
103
110
|
|
|
104
|
-
HMODULE handle = LoadLibraryW(path.c_str());
|
|
111
|
+
HMODULE handle = LoadLibraryW(path.wstring().c_str());
|
|
105
112
|
|
|
106
113
|
SetErrorMode(old_mode);
|
|
107
114
|
|
|
@@ -129,8 +136,8 @@ struct dl_handle_deleter {
|
|
|
129
136
|
}
|
|
130
137
|
};
|
|
131
138
|
|
|
132
|
-
static void * dl_load_library(const
|
|
133
|
-
dl_handle * handle = dlopen(
|
|
139
|
+
static void * dl_load_library(const fs::path & path) {
|
|
140
|
+
dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
|
|
134
141
|
|
|
135
142
|
return handle;
|
|
136
143
|
}
|
|
@@ -217,11 +224,11 @@ struct ggml_backend_registry {
|
|
|
217
224
|
devices.push_back(device);
|
|
218
225
|
}
|
|
219
226
|
|
|
220
|
-
ggml_backend_reg_t load_backend(const
|
|
227
|
+
ggml_backend_reg_t load_backend(const fs::path & path, bool silent) {
|
|
221
228
|
dl_handle_ptr handle { dl_load_library(path) };
|
|
222
229
|
if (!handle) {
|
|
223
230
|
if (!silent) {
|
|
224
|
-
GGML_LOG_ERROR("%s: failed to load %s\n", __func__,
|
|
231
|
+
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path_str(path).c_str());
|
|
225
232
|
}
|
|
226
233
|
return nullptr;
|
|
227
234
|
}
|
|
@@ -229,7 +236,7 @@ struct ggml_backend_registry {
|
|
|
229
236
|
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
|
230
237
|
if (score_fn && score_fn() == 0) {
|
|
231
238
|
if (!silent) {
|
|
232
|
-
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__,
|
|
239
|
+
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path_str(path).c_str());
|
|
233
240
|
}
|
|
234
241
|
return nullptr;
|
|
235
242
|
}
|
|
@@ -237,7 +244,7 @@ struct ggml_backend_registry {
|
|
|
237
244
|
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
|
|
238
245
|
if (!backend_init_fn) {
|
|
239
246
|
if (!silent) {
|
|
240
|
-
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__,
|
|
247
|
+
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path_str(path).c_str());
|
|
241
248
|
}
|
|
242
249
|
return nullptr;
|
|
243
250
|
}
|
|
@@ -246,16 +253,17 @@ struct ggml_backend_registry {
|
|
|
246
253
|
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
|
|
247
254
|
if (!silent) {
|
|
248
255
|
if (!reg) {
|
|
249
|
-
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n",
|
|
256
|
+
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n",
|
|
257
|
+
__func__, path_str(path).c_str());
|
|
250
258
|
} else {
|
|
251
259
|
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
|
|
252
|
-
__func__,
|
|
260
|
+
__func__, path_str(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
|
|
253
261
|
}
|
|
254
262
|
}
|
|
255
263
|
return nullptr;
|
|
256
264
|
}
|
|
257
265
|
|
|
258
|
-
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg),
|
|
266
|
+
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path_str(path).c_str());
|
|
259
267
|
|
|
260
268
|
register_backend(reg, std::move(handle));
|
|
261
269
|
|
|
@@ -391,14 +399,14 @@ ggml_backend_t ggml_backend_init_best(void) {
|
|
|
391
399
|
|
|
392
400
|
// Dynamic loading
|
|
393
401
|
ggml_backend_reg_t ggml_backend_load(const char * path) {
|
|
394
|
-
return get_reg().load_backend(
|
|
402
|
+
return get_reg().load_backend(path, false);
|
|
395
403
|
}
|
|
396
404
|
|
|
397
405
|
void ggml_backend_unload(ggml_backend_reg_t reg) {
|
|
398
406
|
get_reg().unload_backend(reg, true);
|
|
399
407
|
}
|
|
400
408
|
|
|
401
|
-
static
|
|
409
|
+
static fs::path get_executable_path() {
|
|
402
410
|
#if defined(__APPLE__)
|
|
403
411
|
// get executable path
|
|
404
412
|
std::vector<char> path;
|
|
@@ -416,7 +424,7 @@ static std::wstring get_executable_path() {
|
|
|
416
424
|
if (last_slash != std::string::npos) {
|
|
417
425
|
base_path = base_path.substr(0, last_slash);
|
|
418
426
|
}
|
|
419
|
-
return
|
|
427
|
+
return base_path + "/";
|
|
420
428
|
#elif defined(__linux__) || defined(__FreeBSD__)
|
|
421
429
|
std::string base_path = ".";
|
|
422
430
|
std::vector<char> path(1024);
|
|
@@ -442,7 +450,7 @@ static std::wstring get_executable_path() {
|
|
|
442
450
|
path.resize(path.size() * 2);
|
|
443
451
|
}
|
|
444
452
|
|
|
445
|
-
return
|
|
453
|
+
return base_path + "/";
|
|
446
454
|
#elif defined(_WIN32)
|
|
447
455
|
std::vector<wchar_t> path(MAX_PATH);
|
|
448
456
|
DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
|
|
@@ -461,74 +469,69 @@ static std::wstring get_executable_path() {
|
|
|
461
469
|
#endif
|
|
462
470
|
}
|
|
463
471
|
|
|
464
|
-
static
|
|
465
|
-
#ifdef _WIN32
|
|
466
|
-
return L"ggml-";
|
|
467
|
-
#else
|
|
468
|
-
return L"libggml-";
|
|
469
|
-
#endif
|
|
470
|
-
}
|
|
471
|
-
|
|
472
|
-
static std::wstring backend_filename_suffix() {
|
|
472
|
+
static fs::path backend_filename_prefix() {
|
|
473
473
|
#ifdef _WIN32
|
|
474
|
-
return
|
|
474
|
+
return fs::u8path("ggml-");
|
|
475
475
|
#else
|
|
476
|
-
return
|
|
476
|
+
return fs::u8path("libggml-");
|
|
477
477
|
#endif
|
|
478
478
|
}
|
|
479
479
|
|
|
480
|
-
static
|
|
480
|
+
static fs::path backend_filename_extension() {
|
|
481
481
|
#ifdef _WIN32
|
|
482
|
-
return
|
|
482
|
+
return fs::u8path(".dll");
|
|
483
483
|
#else
|
|
484
|
-
return
|
|
484
|
+
return fs::u8path(".so");
|
|
485
485
|
#endif
|
|
486
486
|
}
|
|
487
487
|
|
|
488
488
|
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
|
|
489
489
|
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
490
|
+
const fs::path name_path = fs::u8path(name);
|
|
491
|
+
const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native();
|
|
492
|
+
const fs::path file_extension = backend_filename_extension();
|
|
493
|
+
|
|
494
|
+
std::vector<fs::path> search_paths;
|
|
493
495
|
if (user_search_path == nullptr) {
|
|
494
|
-
|
|
496
|
+
// default search paths: executable directory, current directory
|
|
495
497
|
search_paths.push_back(get_executable_path());
|
|
498
|
+
search_paths.push_back(fs::current_path());
|
|
496
499
|
} else {
|
|
497
|
-
search_paths.push_back(
|
|
500
|
+
search_paths.push_back(fs::u8path(user_search_path));
|
|
498
501
|
}
|
|
499
502
|
|
|
500
503
|
int best_score = 0;
|
|
501
|
-
|
|
504
|
+
fs::path best_path;
|
|
502
505
|
|
|
503
|
-
namespace fs = std::filesystem;
|
|
504
506
|
for (const auto & search_path : search_paths) {
|
|
505
507
|
if (!fs::exists(search_path)) {
|
|
508
|
+
GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str());
|
|
506
509
|
continue;
|
|
507
510
|
}
|
|
508
511
|
fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
|
|
509
512
|
for (const auto & entry : dir_it) {
|
|
510
513
|
if (entry.is_regular_file()) {
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
if (filename.find(file_prefix) == 0 && ext ==
|
|
514
|
-
dl_handle_ptr handle { dl_load_library(entry
|
|
514
|
+
auto filename = entry.path().filename();
|
|
515
|
+
auto ext = entry.path().extension();
|
|
516
|
+
if (filename.native().find(file_prefix) == 0 && ext == file_extension) {
|
|
517
|
+
dl_handle_ptr handle { dl_load_library(entry) };
|
|
515
518
|
if (!handle && !silent) {
|
|
516
|
-
GGML_LOG_ERROR("%s: failed to load %s\n", __func__,
|
|
519
|
+
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path_str(entry.path()).c_str());
|
|
517
520
|
}
|
|
518
521
|
if (handle) {
|
|
519
522
|
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
|
520
523
|
if (score_fn) {
|
|
521
524
|
int s = score_fn();
|
|
522
525
|
#ifndef NDEBUG
|
|
523
|
-
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__,
|
|
526
|
+
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, path_str(entry.path()).c_str(), s);
|
|
524
527
|
#endif
|
|
525
528
|
if (s > best_score) {
|
|
526
529
|
best_score = s;
|
|
527
|
-
best_path = entry.path()
|
|
530
|
+
best_path = entry.path();
|
|
528
531
|
}
|
|
529
532
|
} else {
|
|
530
533
|
if (!silent) {
|
|
531
|
-
GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__,
|
|
534
|
+
GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, path_str(entry.path()).c_str());
|
|
532
535
|
}
|
|
533
536
|
}
|
|
534
537
|
}
|
|
@@ -540,7 +543,8 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
|
|
|
540
543
|
if (best_score == 0) {
|
|
541
544
|
// try to load the base backend
|
|
542
545
|
for (const auto & search_path : search_paths) {
|
|
543
|
-
|
|
546
|
+
fs::path filename = backend_filename_prefix().native() + name_path.native() + backend_filename_extension().native();
|
|
547
|
+
fs::path path = search_path / filename;
|
|
544
548
|
if (fs::exists(path)) {
|
|
545
549
|
return get_reg().load_backend(path, silent);
|
|
546
550
|
}
|
|
@@ -21,6 +21,7 @@
|
|
|
21
21
|
#include <string.h>
|
|
22
22
|
#include <string>
|
|
23
23
|
#include <vector>
|
|
24
|
+
#include <algorithm>
|
|
24
25
|
|
|
25
26
|
#ifdef __APPLE__
|
|
26
27
|
#include <sys/types.h>
|
|
@@ -126,11 +127,12 @@ void * ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
|
126
127
|
return base;
|
|
127
128
|
}
|
|
128
129
|
|
|
129
|
-
|
|
130
|
+
enum ggml_status ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
|
|
130
131
|
// init_tensor is optional
|
|
131
132
|
if (buffer->iface.init_tensor) {
|
|
132
|
-
buffer->iface.init_tensor(buffer, tensor);
|
|
133
|
+
return buffer->iface.init_tensor(buffer, tensor);
|
|
133
134
|
}
|
|
135
|
+
return GGML_STATUS_SUCCESS;
|
|
134
136
|
}
|
|
135
137
|
|
|
136
138
|
void ggml_backend_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
|
@@ -1641,7 +1643,7 @@ ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched,
|
|
|
1641
1643
|
|
|
1642
1644
|
// utils
|
|
1643
1645
|
|
|
1644
|
-
|
|
1646
|
+
enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor) {
|
|
1645
1647
|
GGML_ASSERT(tensor->buffer == NULL);
|
|
1646
1648
|
GGML_ASSERT(tensor->view_src != NULL);
|
|
1647
1649
|
GGML_ASSERT(tensor->view_src->buffer != NULL);
|
|
@@ -1649,10 +1651,10 @@ void ggml_backend_view_init(struct ggml_tensor * tensor) {
|
|
|
1649
1651
|
|
|
1650
1652
|
tensor->buffer = tensor->view_src->buffer;
|
|
1651
1653
|
tensor->data = (char *)tensor->view_src->data + tensor->view_offs;
|
|
1652
|
-
ggml_backend_buffer_init_tensor(tensor->buffer, tensor);
|
|
1654
|
+
return ggml_backend_buffer_init_tensor(tensor->buffer, tensor);
|
|
1653
1655
|
}
|
|
1654
1656
|
|
|
1655
|
-
|
|
1657
|
+
enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) {
|
|
1656
1658
|
GGML_ASSERT(tensor->buffer == NULL);
|
|
1657
1659
|
GGML_ASSERT(tensor->data == NULL);
|
|
1658
1660
|
GGML_ASSERT(tensor->view_src == NULL);
|
|
@@ -1662,7 +1664,7 @@ void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor
|
|
|
1662
1664
|
|
|
1663
1665
|
tensor->buffer = buffer;
|
|
1664
1666
|
tensor->data = addr;
|
|
1665
|
-
ggml_backend_buffer_init_tensor(buffer, tensor);
|
|
1667
|
+
return ggml_backend_buffer_init_tensor(buffer, tensor);
|
|
1666
1668
|
}
|
|
1667
1669
|
|
|
1668
1670
|
static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies,
|
|
@@ -1708,7 +1710,8 @@ static void graph_copy_init_tensor(struct ggml_hash_set * hash_set, struct ggml_
|
|
|
1708
1710
|
struct ggml_tensor * dst = node_copies[id];
|
|
1709
1711
|
if (dst->view_src != NULL) {
|
|
1710
1712
|
graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src);
|
|
1711
|
-
ggml_backend_view_init(dst);
|
|
1713
|
+
enum ggml_status status = ggml_backend_view_init(dst);
|
|
1714
|
+
GGML_ASSERT(status == GGML_STATUS_SUCCESS);
|
|
1712
1715
|
}
|
|
1713
1716
|
else {
|
|
1714
1717
|
ggml_backend_tensor_copy(src, dst);
|
|
@@ -1823,7 +1826,6 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t
|
|
|
1823
1826
|
assert(g1->n_nodes == g2->n_nodes);
|
|
1824
1827
|
|
|
1825
1828
|
for (int i = 0; i < g1->n_nodes; i++) {
|
|
1826
|
-
//printf("eval %d/%d\n", i, g1->n_nodes);
|
|
1827
1829
|
struct ggml_tensor * t1 = g1->nodes[i];
|
|
1828
1830
|
struct ggml_tensor * t2 = g2->nodes[i];
|
|
1829
1831
|
|
|
@@ -796,11 +796,11 @@ static bool need_transform(ggml_type type) {
|
|
|
796
796
|
* @param buffer The CANN buffer from which to initialize the tensor.
|
|
797
797
|
* @param tensor Pointer to the tensor to be initialized.
|
|
798
798
|
*/
|
|
799
|
-
static
|
|
799
|
+
static enum ggml_status ggml_backend_cann_buffer_init_tensor(
|
|
800
800
|
ggml_backend_buffer_t buffer, ggml_tensor* tensor) {
|
|
801
801
|
if (tensor->view_src != NULL && tensor->view_offs == 0) {
|
|
802
802
|
GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
|
|
803
|
-
return;
|
|
803
|
+
return GGML_STATUS_SUCCESS;
|
|
804
804
|
}
|
|
805
805
|
|
|
806
806
|
// TODO: can backend doesn't support quantized yet. Just leave the code
|
|
@@ -817,6 +817,7 @@ static void ggml_backend_cann_buffer_init_tensor(
|
|
|
817
817
|
memset_size, 0, memset_size));
|
|
818
818
|
}
|
|
819
819
|
}
|
|
820
|
+
return GGML_STATUS_SUCCESS;
|
|
820
821
|
}
|
|
821
822
|
|
|
822
823
|
// TODO: need handle tensor which has paddings.
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
#include "kernel_operator.h"
|
|
2
2
|
|
|
3
|
-
#include <cmath>
|
|
4
|
-
|
|
5
3
|
using namespace AscendC;
|
|
6
4
|
|
|
7
5
|
#define BUFFER_NUM 2
|
|
@@ -183,7 +181,7 @@ extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp32(
|
|
|
183
181
|
copy_to_ub(output_ne_gm, output_ne_ub, 32);
|
|
184
182
|
copy_to_ub(output_nb_gm, output_nb_ub, 32);
|
|
185
183
|
|
|
186
|
-
DupByRows<
|
|
184
|
+
DupByRows<float, float> op;
|
|
187
185
|
op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub);
|
|
188
186
|
op.dup();
|
|
189
187
|
}
|
|
@@ -206,7 +204,7 @@ extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp32_to_fp16(
|
|
|
206
204
|
copy_to_ub(output_ne_gm, output_ne_ub, 32);
|
|
207
205
|
copy_to_ub(output_nb_gm, output_nb_ub, 32);
|
|
208
206
|
|
|
209
|
-
DupByRows<
|
|
207
|
+
DupByRows<float, half> op;
|
|
210
208
|
op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub);
|
|
211
209
|
op.dup_with_cast();
|
|
212
210
|
}
|
|
@@ -230,7 +228,7 @@ extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp16_to_fp32(
|
|
|
230
228
|
copy_to_ub(output_ne_gm, output_ne_ub, 32);
|
|
231
229
|
copy_to_ub(output_nb_gm, output_nb_ub, 32);
|
|
232
230
|
|
|
233
|
-
DupByRows<half,
|
|
231
|
+
DupByRows<half, float> op;
|
|
234
232
|
op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub);
|
|
235
233
|
op.dup_with_cast();
|
|
236
234
|
}
|
|
@@ -473,7 +473,6 @@ GGML_TABLE_BEGIN(uint8_t, ksigns_iq2xs, 128)
|
|
|
473
473
|
240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
|
|
474
474
|
GGML_TABLE_END()
|
|
475
475
|
|
|
476
|
-
//#if __CUDA_ARCH__ >= GGML_CUDA_CC_DP4A // lowest compute capability for integer intrinsics
|
|
477
476
|
GGML_TABLE_BEGIN(uint64_t, ksigns64, 128)
|
|
478
477
|
0x0000000000000000, 0xff000000000000ff, 0xff0000000000ff00, 0x000000000000ffff,
|
|
479
478
|
0xff00000000ff0000, 0x0000000000ff00ff, 0x0000000000ffff00, 0xff00000000ffffff,
|
|
@@ -508,7 +507,6 @@ GGML_TABLE_BEGIN(uint64_t, ksigns64, 128)
|
|
|
508
507
|
0x00ffffffff000000, 0xffffffffff0000ff, 0xffffffffff00ff00, 0x00ffffffff00ffff,
|
|
509
508
|
0xffffffffffff0000, 0x00ffffffffff00ff, 0x00ffffffffffff00, 0xffffffffffffffff,
|
|
510
509
|
GGML_TABLE_END()
|
|
511
|
-
//#endif
|
|
512
510
|
|
|
513
511
|
|
|
514
512
|
GGML_TABLE_BEGIN(uint64_t, iq2xxs_grid, 256)
|