@fugood/llama.node 0.3.13 → 0.3.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +1 -1
- package/package.json +1 -1
- package/src/LlamaContext.cpp +98 -76
- package/src/LlamaContext.h +1 -1
- package/src/common.hpp +1 -2
- package/src/llama.cpp/.github/workflows/build.yml +89 -10
- package/src/llama.cpp/.github/workflows/server.yml +2 -0
- package/src/llama.cpp/CMakeLists.txt +9 -1
- package/src/llama.cpp/cmake/common.cmake +2 -0
- package/src/llama.cpp/common/CMakeLists.txt +3 -3
- package/src/llama.cpp/common/arg.cpp +132 -13
- package/src/llama.cpp/common/chat.cpp +960 -266
- package/src/llama.cpp/common/chat.h +135 -0
- package/src/llama.cpp/common/common.cpp +33 -174
- package/src/llama.cpp/common/common.h +27 -67
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +4 -5
- package/src/llama.cpp/common/json-schema-to-grammar.h +0 -1
- package/src/llama.cpp/common/{minja.hpp → minja/minja.hpp} +37 -5
- package/src/llama.cpp/common/ngram-cache.cpp +1 -0
- package/src/llama.cpp/common/sampling.cpp +45 -7
- package/src/llama.cpp/common/speculative.cpp +10 -9
- package/src/llama.cpp/common/speculative.h +1 -1
- package/src/llama.cpp/docs/build.md +45 -7
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +2 -2
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +4 -2
- package/src/llama.cpp/examples/embedding/embedding.cpp +2 -1
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +4 -2
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +2 -2
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +3 -4
- package/src/llama.cpp/examples/infill/infill.cpp +2 -2
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +2 -2
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +5 -5
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +373 -107
- package/src/llama.cpp/examples/llava/clip.h +19 -3
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +341 -0
- package/src/llama.cpp/examples/llava/llava.cpp +4 -2
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +30 -11
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +7 -6
- package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
- package/src/llama.cpp/examples/main/main.cpp +79 -34
- package/src/llama.cpp/examples/parallel/parallel.cpp +6 -5
- package/src/llama.cpp/examples/passkey/passkey.cpp +15 -14
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +6 -6
- package/src/llama.cpp/examples/quantize/quantize.cpp +1 -0
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -2
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +1 -1
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +882 -237
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +35 -26
- package/src/llama.cpp/examples/run/run.cpp +196 -108
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +2 -2
- package/src/llama.cpp/examples/server/server.cpp +113 -101
- package/src/llama.cpp/examples/server/utils.hpp +94 -105
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
- package/src/llama.cpp/examples/speculative/speculative.cpp +14 -14
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/sycl/run-llama2.sh +2 -2
- package/src/llama.cpp/examples/tts/tts.cpp +263 -151
- package/src/llama.cpp/ggml/CMakeLists.txt +14 -1
- package/src/llama.cpp/ggml/cmake/common.cmake +26 -0
- package/src/llama.cpp/ggml/include/ggml-alloc.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-backend.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-cpu.h +3 -0
- package/src/llama.cpp/ggml/include/ggml.h +29 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +15 -34
- package/src/llama.cpp/ggml/src/ggml-alloc.c +24 -15
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +58 -54
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +6 -2
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +3 -7
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +3 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +139 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +151 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1546 -387
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1645 -113
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +22 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +259 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +61 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +288 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +15 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +2 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -1
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +14 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -5
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +242 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +6 -6
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +1 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -138
- package/src/llama.cpp/ggml/src/ggml-quants.c +114 -114
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +5 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +117 -36
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +33 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +147 -16
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +40 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +307 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +262 -746
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -78
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +114 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +4 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +305 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +498 -188
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +16 -3
- package/src/llama.cpp/ggml/src/ggml.c +93 -5
- package/src/llama.cpp/include/llama.h +105 -27
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +1 -0
- package/src/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
- package/src/llama.cpp/requirements.txt +1 -0
- package/src/llama.cpp/src/CMakeLists.txt +5 -2
- package/src/llama.cpp/src/llama-adapter.cpp +19 -20
- package/src/llama.cpp/src/llama-adapter.h +11 -9
- package/src/llama.cpp/src/llama-arch.cpp +123 -16
- package/src/llama.cpp/src/llama-arch.h +19 -0
- package/src/llama.cpp/src/llama-batch.h +2 -2
- package/src/llama.cpp/src/llama-chat.cpp +1 -0
- package/src/llama.cpp/src/llama-context.cpp +2253 -1222
- package/src/llama.cpp/src/llama-context.h +214 -77
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-grammar.cpp +182 -182
- package/src/llama.cpp/src/llama-grammar.h +12 -3
- package/src/llama.cpp/src/llama-graph.cpp +1662 -0
- package/src/llama.cpp/src/llama-graph.h +574 -0
- package/src/llama.cpp/src/llama-hparams.cpp +8 -0
- package/src/llama.cpp/src/llama-hparams.h +9 -0
- package/src/llama.cpp/src/llama-io.cpp +15 -0
- package/src/llama.cpp/src/llama-io.h +35 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +1006 -291
- package/src/llama.cpp/src/llama-kv-cache.h +178 -109
- package/src/llama.cpp/src/llama-memory.cpp +1 -0
- package/src/llama.cpp/src/llama-memory.h +21 -0
- package/src/llama.cpp/src/llama-mmap.cpp +11 -1
- package/src/llama.cpp/src/llama-model.cpp +8230 -122
- package/src/llama.cpp/src/llama-model.h +34 -1
- package/src/llama.cpp/src/llama-quant.cpp +10 -1
- package/src/llama.cpp/src/llama-sampling.cpp +43 -10
- package/src/llama.cpp/src/llama-vocab.cpp +12 -0
- package/src/llama.cpp/src/llama.cpp +51 -9837
- package/src/llama.cpp/tests/test-backend-ops.cpp +247 -112
- package/src/llama.cpp/tests/test-chat-template.cpp +32 -22
- package/src/llama.cpp/tests/test-chat.cpp +593 -395
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +63 -63
- package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -9
- package/src/llama.cpp/Sources/llama/llama.h +0 -4
- package/src/llama.cpp/common/chat.hpp +0 -55
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +0 -143
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +0 -9
- /package/src/llama.cpp/common/{chat-template.hpp → minja/chat-template.hpp} +0 -0
|
@@ -56,7 +56,7 @@ extern "C" {
|
|
|
56
56
|
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
|
|
57
57
|
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
|
|
58
58
|
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
|
|
59
|
-
GGML_API
|
|
59
|
+
GGML_API enum ggml_status ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
|
60
60
|
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
|
61
61
|
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
|
|
62
62
|
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
|
@@ -342,8 +342,8 @@ extern "C" {
|
|
|
342
342
|
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
|
|
343
343
|
|
|
344
344
|
// Tensor initialization
|
|
345
|
-
GGML_API
|
|
346
|
-
GGML_API
|
|
345
|
+
GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|
|
346
|
+
GGML_API enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor);
|
|
347
347
|
|
|
348
348
|
// CPU buffer types are always available
|
|
349
349
|
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
|
@@ -80,6 +80,7 @@ extern "C" {
|
|
|
80
80
|
GGML_BACKEND_API int ggml_cpu_has_avx (void);
|
|
81
81
|
GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
|
|
82
82
|
GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
|
|
83
|
+
GGML_BACKEND_API int ggml_cpu_has_bmi2 (void);
|
|
83
84
|
GGML_BACKEND_API int ggml_cpu_has_f16c (void);
|
|
84
85
|
GGML_BACKEND_API int ggml_cpu_has_fma (void);
|
|
85
86
|
GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
|
|
@@ -95,9 +96,11 @@ extern "C" {
|
|
|
95
96
|
GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
|
|
96
97
|
GGML_BACKEND_API int ggml_cpu_has_sve (void);
|
|
97
98
|
GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes
|
|
99
|
+
GGML_BACKEND_API int ggml_cpu_has_sme (void);
|
|
98
100
|
// other
|
|
99
101
|
GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
|
|
100
102
|
GGML_BACKEND_API int ggml_cpu_has_vsx (void);
|
|
103
|
+
GGML_BACKEND_API int ggml_cpu_has_vxe (void);
|
|
101
104
|
GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
|
|
102
105
|
GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
|
|
103
106
|
|
|
@@ -454,6 +454,7 @@ extern "C" {
|
|
|
454
454
|
GGML_OP_RMS_NORM,
|
|
455
455
|
GGML_OP_RMS_NORM_BACK,
|
|
456
456
|
GGML_OP_GROUP_NORM,
|
|
457
|
+
GGML_OP_L2_NORM,
|
|
457
458
|
|
|
458
459
|
GGML_OP_MUL_MAT,
|
|
459
460
|
GGML_OP_MUL_MAT_ID,
|
|
@@ -502,6 +503,7 @@ extern "C" {
|
|
|
502
503
|
GGML_OP_ADD_REL_POS,
|
|
503
504
|
GGML_OP_RWKV_WKV6,
|
|
504
505
|
GGML_OP_GATED_LINEAR_ATTN,
|
|
506
|
+
GGML_OP_RWKV_WKV7,
|
|
505
507
|
|
|
506
508
|
GGML_OP_UNARY,
|
|
507
509
|
|
|
@@ -1095,6 +1097,18 @@ extern "C" {
|
|
|
1095
1097
|
int n_groups,
|
|
1096
1098
|
float eps);
|
|
1097
1099
|
|
|
1100
|
+
// l2 normalize along rows
|
|
1101
|
+
// used in rwkv v7
|
|
1102
|
+
GGML_API struct ggml_tensor * ggml_l2_norm(
|
|
1103
|
+
struct ggml_context * ctx,
|
|
1104
|
+
struct ggml_tensor * a,
|
|
1105
|
+
float eps);
|
|
1106
|
+
|
|
1107
|
+
GGML_API struct ggml_tensor * ggml_l2_norm_inplace(
|
|
1108
|
+
struct ggml_context * ctx,
|
|
1109
|
+
struct ggml_tensor * a,
|
|
1110
|
+
float eps);
|
|
1111
|
+
|
|
1098
1112
|
// a - x
|
|
1099
1113
|
// b - dy
|
|
1100
1114
|
GGML_API struct ggml_tensor * ggml_rms_norm_back(
|
|
@@ -1890,6 +1904,16 @@ extern "C" {
|
|
|
1890
1904
|
struct ggml_tensor * state,
|
|
1891
1905
|
float scale);
|
|
1892
1906
|
|
|
1907
|
+
GGML_API struct ggml_tensor * ggml_rwkv_wkv7(
|
|
1908
|
+
struct ggml_context * ctx,
|
|
1909
|
+
struct ggml_tensor * r,
|
|
1910
|
+
struct ggml_tensor * w,
|
|
1911
|
+
struct ggml_tensor * k,
|
|
1912
|
+
struct ggml_tensor * v,
|
|
1913
|
+
struct ggml_tensor * a,
|
|
1914
|
+
struct ggml_tensor * b,
|
|
1915
|
+
struct ggml_tensor * state);
|
|
1916
|
+
|
|
1893
1917
|
// custom operators
|
|
1894
1918
|
|
|
1895
1919
|
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
|
@@ -2140,7 +2164,11 @@ extern "C" {
|
|
|
2140
2164
|
# define GGML_RESTRICT
|
|
2141
2165
|
# endif
|
|
2142
2166
|
#else
|
|
2143
|
-
#
|
|
2167
|
+
# if defined (_MSC_VER) && (__STDC_VERSION__ < 201112L)
|
|
2168
|
+
# define GGML_RESTRICT __restrict
|
|
2169
|
+
# else
|
|
2170
|
+
# define GGML_RESTRICT restrict
|
|
2171
|
+
# endif
|
|
2144
2172
|
#endif
|
|
2145
2173
|
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
|
2146
2174
|
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
include(CheckCXXCompilerFlag)
|
|
2
|
+
include("../cmake/common.cmake")
|
|
2
3
|
|
|
3
4
|
add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES})
|
|
4
5
|
|
|
@@ -24,33 +25,6 @@ if (NOT MSVC)
|
|
|
24
25
|
endif()
|
|
25
26
|
endif()
|
|
26
27
|
|
|
27
|
-
function(ggml_get_flags CCID CCVER)
|
|
28
|
-
set(C_FLAGS "")
|
|
29
|
-
set(CXX_FLAGS "")
|
|
30
|
-
|
|
31
|
-
if (CCID MATCHES "Clang")
|
|
32
|
-
set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return)
|
|
33
|
-
set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)
|
|
34
|
-
|
|
35
|
-
if (
|
|
36
|
-
(CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
|
|
37
|
-
(CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
|
|
38
|
-
)
|
|
39
|
-
list(APPEND C_FLAGS -Wdouble-promotion)
|
|
40
|
-
endif()
|
|
41
|
-
elseif (CCID STREQUAL "GNU")
|
|
42
|
-
set(C_FLAGS -Wdouble-promotion)
|
|
43
|
-
set(CXX_FLAGS -Wno-array-bounds)
|
|
44
|
-
|
|
45
|
-
if (CCVER VERSION_GREATER_EQUAL 8.1.0)
|
|
46
|
-
list(APPEND CXX_FLAGS -Wextra-semi)
|
|
47
|
-
endif()
|
|
48
|
-
endif()
|
|
49
|
-
|
|
50
|
-
set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE)
|
|
51
|
-
set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
|
|
52
|
-
endfunction()
|
|
53
|
-
|
|
54
28
|
if (GGML_FATAL_WARNINGS)
|
|
55
29
|
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
|
56
30
|
list(APPEND C_FLAGS -Werror)
|
|
@@ -226,6 +200,9 @@ add_library(ggml-base
|
|
|
226
200
|
gguf.cpp)
|
|
227
201
|
|
|
228
202
|
target_include_directories(ggml-base PRIVATE .)
|
|
203
|
+
if (GGML_BACKEND_DL)
|
|
204
|
+
target_compile_definitions(ggml-base PUBLIC GGML_BACKEND_DL)
|
|
205
|
+
endif()
|
|
229
206
|
|
|
230
207
|
add_library(ggml
|
|
231
208
|
ggml-backend-reg.cpp)
|
|
@@ -233,7 +210,7 @@ add_library(ggml
|
|
|
233
210
|
target_link_libraries(ggml PUBLIC ggml-base)
|
|
234
211
|
|
|
235
212
|
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
|
236
|
-
target_link_libraries(ggml PRIVATE dl)
|
|
213
|
+
target_link_libraries(ggml PRIVATE dl stdc++fs)
|
|
237
214
|
endif()
|
|
238
215
|
|
|
239
216
|
function(ggml_add_backend_library backend)
|
|
@@ -286,7 +263,7 @@ function(ggml_add_cpu_backend_variant tag_name)
|
|
|
286
263
|
set(GGML_CPU_TAG_NAME ${tag_name})
|
|
287
264
|
# other: OPENMP LLAMAFILE CPU_HBM
|
|
288
265
|
foreach (feat NATIVE
|
|
289
|
-
AVX AVX2 AVX_VNNI FMA F16C
|
|
266
|
+
AVX AVX2 BMI2 AVX_VNNI FMA F16C
|
|
290
267
|
AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
|
|
291
268
|
AMX_TILE AMX_INT8 AMX_BF16)
|
|
292
269
|
set(GGML_${feat} OFF)
|
|
@@ -306,13 +283,13 @@ if (GGML_CPU_ALL_VARIANTS)
|
|
|
306
283
|
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
|
|
307
284
|
endif()
|
|
308
285
|
ggml_add_cpu_backend_variant(sandybridge AVX)
|
|
309
|
-
ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 FMA)
|
|
310
|
-
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
|
|
311
|
-
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
|
|
312
|
-
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
|
|
286
|
+
ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 BMI2 FMA)
|
|
287
|
+
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 BMI2 FMA AVX512)
|
|
288
|
+
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
|
|
289
|
+
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 BMI2 FMA AVX_VNNI)
|
|
313
290
|
if (NOT MSVC)
|
|
314
291
|
# MSVC doesn't support AMX
|
|
315
|
-
ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
|
292
|
+
ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
|
316
293
|
endif()
|
|
317
294
|
elseif (GGML_CPU)
|
|
318
295
|
ggml_add_cpu_backend_variant_impl("")
|
|
@@ -348,6 +325,10 @@ if (CMAKE_SYSTEM_NAME MATCHES "Android")
|
|
|
348
325
|
target_link_libraries(ggml-base PRIVATE dl)
|
|
349
326
|
endif()
|
|
350
327
|
|
|
328
|
+
if(CMAKE_SYSTEM_NAME MATCHES "visionOS")
|
|
329
|
+
target_compile_definitions(ggml-base PUBLIC _DARWIN_C_SOURCE)
|
|
330
|
+
endif()
|
|
331
|
+
|
|
351
332
|
if (BUILD_SHARED_LIBS)
|
|
352
333
|
foreach (target ggml-base ggml)
|
|
353
334
|
set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
@@ -89,7 +89,7 @@ struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer) {
|
|
|
89
89
|
return talloc;
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
-
|
|
92
|
+
enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
|
|
93
93
|
size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor);
|
|
94
94
|
size = GGML_PAD(size, talloc->alignment);
|
|
95
95
|
|
|
@@ -104,7 +104,7 @@ void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tenso
|
|
|
104
104
|
|
|
105
105
|
assert(((uintptr_t)addr % talloc->alignment) == 0);
|
|
106
106
|
|
|
107
|
-
ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
|
|
107
|
+
return ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
|
|
108
108
|
}
|
|
109
109
|
|
|
110
110
|
// dynamic tensor allocator
|
|
@@ -933,42 +933,51 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
|
|
|
933
933
|
|
|
934
934
|
// utils
|
|
935
935
|
|
|
936
|
+
static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) {
|
|
937
|
+
for (size_t i = 0; i < *n_buffers; i++) {
|
|
938
|
+
ggml_backend_buffer_free((*buffers)[i]);
|
|
939
|
+
}
|
|
940
|
+
free(*buffers);
|
|
941
|
+
}
|
|
942
|
+
|
|
936
943
|
static bool alloc_tensor_range(struct ggml_context * ctx,
|
|
937
944
|
struct ggml_tensor * first, struct ggml_tensor * last,
|
|
938
945
|
ggml_backend_buffer_type_t buft, size_t size,
|
|
939
946
|
ggml_backend_buffer_t ** buffers, size_t * n_buffers) {
|
|
947
|
+
|
|
940
948
|
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);
|
|
941
949
|
if (buffer == NULL) {
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
#endif
|
|
945
|
-
for (size_t i = 0; i < *n_buffers; i++) {
|
|
946
|
-
ggml_backend_buffer_free((*buffers)[i]);
|
|
947
|
-
}
|
|
948
|
-
free(*buffers);
|
|
950
|
+
GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
|
|
951
|
+
free_buffers(buffers, n_buffers);
|
|
949
952
|
return false;
|
|
950
953
|
}
|
|
951
954
|
|
|
955
|
+
*buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
|
|
956
|
+
(*buffers)[(*n_buffers)++] = buffer;
|
|
957
|
+
|
|
952
958
|
struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
|
|
953
959
|
|
|
954
960
|
for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
|
|
961
|
+
enum ggml_status status = GGML_STATUS_SUCCESS;
|
|
955
962
|
if (t->data == NULL) {
|
|
956
963
|
if (t->view_src == NULL) {
|
|
957
|
-
ggml_tallocr_alloc(&tallocr, t);
|
|
964
|
+
status = ggml_tallocr_alloc(&tallocr, t);
|
|
958
965
|
} else if (t->buffer == NULL) {
|
|
959
|
-
ggml_backend_view_init(t);
|
|
966
|
+
status = ggml_backend_view_init(t);
|
|
960
967
|
}
|
|
961
968
|
} else {
|
|
962
969
|
if (t->view_src != NULL && t->buffer == NULL) {
|
|
963
970
|
// view of a pre-allocated tensor
|
|
964
|
-
ggml_backend_view_init(t);
|
|
971
|
+
status = ggml_backend_view_init(t);
|
|
965
972
|
}
|
|
966
973
|
}
|
|
974
|
+
if (status != GGML_STATUS_SUCCESS) {
|
|
975
|
+
GGML_LOG_ERROR("%s: failed to initialize tensor %s\n", __func__, t->name);
|
|
976
|
+
free_buffers(buffers, n_buffers);
|
|
977
|
+
return false;
|
|
978
|
+
}
|
|
967
979
|
}
|
|
968
980
|
|
|
969
|
-
*buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
|
|
970
|
-
(*buffers)[(*n_buffers)++] = buffer;
|
|
971
|
-
|
|
972
981
|
return true;
|
|
973
982
|
}
|
|
974
983
|
|
|
@@ -44,7 +44,7 @@ extern "C" {
|
|
|
44
44
|
// base address of the buffer
|
|
45
45
|
void * (*get_base) (ggml_backend_buffer_t buffer);
|
|
46
46
|
// (optional) initialize a tensor in the buffer (eg. add tensor extras)
|
|
47
|
-
|
|
47
|
+
enum ggml_status (*init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
|
48
48
|
// tensor data access
|
|
49
49
|
void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
|
|
50
50
|
void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
|
@@ -2,14 +2,13 @@
|
|
|
2
2
|
#include "ggml-backend.h"
|
|
3
3
|
#include "ggml-impl.h"
|
|
4
4
|
#include <algorithm>
|
|
5
|
-
#include <codecvt>
|
|
6
5
|
#include <cstring>
|
|
7
6
|
#include <filesystem>
|
|
8
|
-
#include <locale>
|
|
9
7
|
#include <memory>
|
|
10
8
|
#include <string>
|
|
11
9
|
#include <type_traits>
|
|
12
10
|
#include <vector>
|
|
11
|
+
#include <cctype>
|
|
13
12
|
|
|
14
13
|
#ifdef _WIN32
|
|
15
14
|
# define WIN32_LEAN_AND_MEAN
|
|
@@ -72,14 +71,22 @@
|
|
|
72
71
|
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
|
73
72
|
#endif
|
|
74
73
|
|
|
75
|
-
|
|
76
|
-
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
|
77
|
-
return converter.from_bytes(str);
|
|
78
|
-
}
|
|
74
|
+
namespace fs = std::filesystem;
|
|
79
75
|
|
|
80
|
-
static std::string
|
|
81
|
-
std::
|
|
82
|
-
|
|
76
|
+
static std::string path_str(const fs::path & path) {
|
|
77
|
+
std::string u8path;
|
|
78
|
+
try {
|
|
79
|
+
#if defined(__cpp_lib_char8_t)
|
|
80
|
+
// C++20 and later: u8string() returns std::u8string
|
|
81
|
+
std::u8string u8str = path.u8string();
|
|
82
|
+
u8path = std::string(reinterpret_cast<const char*>(u8str.c_str()));
|
|
83
|
+
#else
|
|
84
|
+
// C++17: u8string() returns std::string
|
|
85
|
+
u8path = path.u8string();
|
|
86
|
+
#endif
|
|
87
|
+
} catch (...) {
|
|
88
|
+
}
|
|
89
|
+
return u8path;
|
|
83
90
|
}
|
|
84
91
|
|
|
85
92
|
#if defined(__clang__)
|
|
@@ -96,12 +103,12 @@ struct dl_handle_deleter {
|
|
|
96
103
|
}
|
|
97
104
|
};
|
|
98
105
|
|
|
99
|
-
static dl_handle * dl_load_library(const
|
|
106
|
+
static dl_handle * dl_load_library(const fs::path & path) {
|
|
100
107
|
// suppress error dialogs for missing DLLs
|
|
101
108
|
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
|
102
109
|
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
|
103
110
|
|
|
104
|
-
HMODULE handle = LoadLibraryW(path.c_str());
|
|
111
|
+
HMODULE handle = LoadLibraryW(path.wstring().c_str());
|
|
105
112
|
|
|
106
113
|
SetErrorMode(old_mode);
|
|
107
114
|
|
|
@@ -129,8 +136,8 @@ struct dl_handle_deleter {
|
|
|
129
136
|
}
|
|
130
137
|
};
|
|
131
138
|
|
|
132
|
-
static void * dl_load_library(const
|
|
133
|
-
dl_handle * handle = dlopen(
|
|
139
|
+
static void * dl_load_library(const fs::path & path) {
|
|
140
|
+
dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
|
|
134
141
|
|
|
135
142
|
return handle;
|
|
136
143
|
}
|
|
@@ -217,11 +224,11 @@ struct ggml_backend_registry {
|
|
|
217
224
|
devices.push_back(device);
|
|
218
225
|
}
|
|
219
226
|
|
|
220
|
-
ggml_backend_reg_t load_backend(const
|
|
227
|
+
ggml_backend_reg_t load_backend(const fs::path & path, bool silent) {
|
|
221
228
|
dl_handle_ptr handle { dl_load_library(path) };
|
|
222
229
|
if (!handle) {
|
|
223
230
|
if (!silent) {
|
|
224
|
-
GGML_LOG_ERROR("%s: failed to load %s\n", __func__,
|
|
231
|
+
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path_str(path).c_str());
|
|
225
232
|
}
|
|
226
233
|
return nullptr;
|
|
227
234
|
}
|
|
@@ -229,7 +236,7 @@ struct ggml_backend_registry {
|
|
|
229
236
|
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
|
230
237
|
if (score_fn && score_fn() == 0) {
|
|
231
238
|
if (!silent) {
|
|
232
|
-
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__,
|
|
239
|
+
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path_str(path).c_str());
|
|
233
240
|
}
|
|
234
241
|
return nullptr;
|
|
235
242
|
}
|
|
@@ -237,7 +244,7 @@ struct ggml_backend_registry {
|
|
|
237
244
|
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
|
|
238
245
|
if (!backend_init_fn) {
|
|
239
246
|
if (!silent) {
|
|
240
|
-
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__,
|
|
247
|
+
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path_str(path).c_str());
|
|
241
248
|
}
|
|
242
249
|
return nullptr;
|
|
243
250
|
}
|
|
@@ -246,16 +253,17 @@ struct ggml_backend_registry {
|
|
|
246
253
|
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
|
|
247
254
|
if (!silent) {
|
|
248
255
|
if (!reg) {
|
|
249
|
-
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n",
|
|
256
|
+
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n",
|
|
257
|
+
__func__, path_str(path).c_str());
|
|
250
258
|
} else {
|
|
251
259
|
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
|
|
252
|
-
__func__,
|
|
260
|
+
__func__, path_str(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
|
|
253
261
|
}
|
|
254
262
|
}
|
|
255
263
|
return nullptr;
|
|
256
264
|
}
|
|
257
265
|
|
|
258
|
-
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg),
|
|
266
|
+
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path_str(path).c_str());
|
|
259
267
|
|
|
260
268
|
register_backend(reg, std::move(handle));
|
|
261
269
|
|
|
@@ -391,14 +399,14 @@ ggml_backend_t ggml_backend_init_best(void) {
|
|
|
391
399
|
|
|
392
400
|
// Dynamic loading
|
|
393
401
|
ggml_backend_reg_t ggml_backend_load(const char * path) {
|
|
394
|
-
return get_reg().load_backend(
|
|
402
|
+
return get_reg().load_backend(path, false);
|
|
395
403
|
}
|
|
396
404
|
|
|
397
405
|
void ggml_backend_unload(ggml_backend_reg_t reg) {
|
|
398
406
|
get_reg().unload_backend(reg, true);
|
|
399
407
|
}
|
|
400
408
|
|
|
401
|
-
static
|
|
409
|
+
static fs::path get_executable_path() {
|
|
402
410
|
#if defined(__APPLE__)
|
|
403
411
|
// get executable path
|
|
404
412
|
std::vector<char> path;
|
|
@@ -416,7 +424,7 @@ static std::wstring get_executable_path() {
|
|
|
416
424
|
if (last_slash != std::string::npos) {
|
|
417
425
|
base_path = base_path.substr(0, last_slash);
|
|
418
426
|
}
|
|
419
|
-
return
|
|
427
|
+
return base_path + "/";
|
|
420
428
|
#elif defined(__linux__) || defined(__FreeBSD__)
|
|
421
429
|
std::string base_path = ".";
|
|
422
430
|
std::vector<char> path(1024);
|
|
@@ -442,7 +450,7 @@ static std::wstring get_executable_path() {
|
|
|
442
450
|
path.resize(path.size() * 2);
|
|
443
451
|
}
|
|
444
452
|
|
|
445
|
-
return
|
|
453
|
+
return base_path + "/";
|
|
446
454
|
#elif defined(_WIN32)
|
|
447
455
|
std::vector<wchar_t> path(MAX_PATH);
|
|
448
456
|
DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
|
|
@@ -461,74 +469,69 @@ static std::wstring get_executable_path() {
|
|
|
461
469
|
#endif
|
|
462
470
|
}
|
|
463
471
|
|
|
464
|
-
static
|
|
465
|
-
#ifdef _WIN32
|
|
466
|
-
return L"ggml-";
|
|
467
|
-
#else
|
|
468
|
-
return L"libggml-";
|
|
469
|
-
#endif
|
|
470
|
-
}
|
|
471
|
-
|
|
472
|
-
static std::wstring backend_filename_suffix() {
|
|
472
|
+
static fs::path backend_filename_prefix() {
|
|
473
473
|
#ifdef _WIN32
|
|
474
|
-
return
|
|
474
|
+
return fs::u8path("ggml-");
|
|
475
475
|
#else
|
|
476
|
-
return
|
|
476
|
+
return fs::u8path("libggml-");
|
|
477
477
|
#endif
|
|
478
478
|
}
|
|
479
479
|
|
|
480
|
-
static
|
|
480
|
+
static fs::path backend_filename_extension() {
|
|
481
481
|
#ifdef _WIN32
|
|
482
|
-
return
|
|
482
|
+
return fs::u8path(".dll");
|
|
483
483
|
#else
|
|
484
|
-
return
|
|
484
|
+
return fs::u8path(".so");
|
|
485
485
|
#endif
|
|
486
486
|
}
|
|
487
487
|
|
|
488
488
|
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
|
|
489
489
|
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
490
|
+
const fs::path name_path = fs::u8path(name);
|
|
491
|
+
const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native();
|
|
492
|
+
const fs::path file_extension = backend_filename_extension();
|
|
493
|
+
|
|
494
|
+
std::vector<fs::path> search_paths;
|
|
493
495
|
if (user_search_path == nullptr) {
|
|
494
|
-
|
|
496
|
+
// default search paths: executable directory, current directory
|
|
495
497
|
search_paths.push_back(get_executable_path());
|
|
498
|
+
search_paths.push_back(fs::current_path());
|
|
496
499
|
} else {
|
|
497
|
-
search_paths.push_back(
|
|
500
|
+
search_paths.push_back(fs::u8path(user_search_path));
|
|
498
501
|
}
|
|
499
502
|
|
|
500
503
|
int best_score = 0;
|
|
501
|
-
|
|
504
|
+
fs::path best_path;
|
|
502
505
|
|
|
503
|
-
namespace fs = std::filesystem;
|
|
504
506
|
for (const auto & search_path : search_paths) {
|
|
505
507
|
if (!fs::exists(search_path)) {
|
|
508
|
+
GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str());
|
|
506
509
|
continue;
|
|
507
510
|
}
|
|
508
511
|
fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
|
|
509
512
|
for (const auto & entry : dir_it) {
|
|
510
513
|
if (entry.is_regular_file()) {
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
if (filename.find(file_prefix) == 0 && ext ==
|
|
514
|
-
dl_handle_ptr handle { dl_load_library(entry
|
|
514
|
+
auto filename = entry.path().filename();
|
|
515
|
+
auto ext = entry.path().extension();
|
|
516
|
+
if (filename.native().find(file_prefix) == 0 && ext == file_extension) {
|
|
517
|
+
dl_handle_ptr handle { dl_load_library(entry) };
|
|
515
518
|
if (!handle && !silent) {
|
|
516
|
-
GGML_LOG_ERROR("%s: failed to load %s\n", __func__,
|
|
519
|
+
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path_str(entry.path()).c_str());
|
|
517
520
|
}
|
|
518
521
|
if (handle) {
|
|
519
522
|
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
|
520
523
|
if (score_fn) {
|
|
521
524
|
int s = score_fn();
|
|
522
525
|
#ifndef NDEBUG
|
|
523
|
-
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__,
|
|
526
|
+
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, path_str(entry.path()).c_str(), s);
|
|
524
527
|
#endif
|
|
525
528
|
if (s > best_score) {
|
|
526
529
|
best_score = s;
|
|
527
|
-
best_path = entry.path()
|
|
530
|
+
best_path = entry.path();
|
|
528
531
|
}
|
|
529
532
|
} else {
|
|
530
533
|
if (!silent) {
|
|
531
|
-
GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__,
|
|
534
|
+
GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, path_str(entry.path()).c_str());
|
|
532
535
|
}
|
|
533
536
|
}
|
|
534
537
|
}
|
|
@@ -540,7 +543,8 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
|
|
|
540
543
|
if (best_score == 0) {
|
|
541
544
|
// try to load the base backend
|
|
542
545
|
for (const auto & search_path : search_paths) {
|
|
543
|
-
|
|
546
|
+
fs::path filename = backend_filename_prefix().native() + name_path.native() + backend_filename_extension().native();
|
|
547
|
+
fs::path path = search_path / filename;
|
|
544
548
|
if (fs::exists(path)) {
|
|
545
549
|
return get_reg().load_backend(path, silent);
|
|
546
550
|
}
|
|
@@ -21,6 +21,7 @@
|
|
|
21
21
|
#include <string.h>
|
|
22
22
|
#include <string>
|
|
23
23
|
#include <vector>
|
|
24
|
+
#include <algorithm>
|
|
24
25
|
|
|
25
26
|
#ifdef __APPLE__
|
|
26
27
|
#include <sys/types.h>
|
|
@@ -126,11 +127,12 @@ void * ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
|
126
127
|
return base;
|
|
127
128
|
}
|
|
128
129
|
|
|
129
|
-
|
|
130
|
+
enum ggml_status ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
|
|
130
131
|
// init_tensor is optional
|
|
131
132
|
if (buffer->iface.init_tensor) {
|
|
132
|
-
buffer->iface.init_tensor(buffer, tensor);
|
|
133
|
+
return buffer->iface.init_tensor(buffer, tensor);
|
|
133
134
|
}
|
|
135
|
+
return GGML_STATUS_SUCCESS;
|
|
134
136
|
}
|
|
135
137
|
|
|
136
138
|
void ggml_backend_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
|
@@ -1641,7 +1643,7 @@ ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched,
|
|
|
1641
1643
|
|
|
1642
1644
|
// utils
|
|
1643
1645
|
|
|
1644
|
-
|
|
1646
|
+
enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor) {
|
|
1645
1647
|
GGML_ASSERT(tensor->buffer == NULL);
|
|
1646
1648
|
GGML_ASSERT(tensor->view_src != NULL);
|
|
1647
1649
|
GGML_ASSERT(tensor->view_src->buffer != NULL);
|
|
@@ -1649,10 +1651,10 @@ void ggml_backend_view_init(struct ggml_tensor * tensor) {
|
|
|
1649
1651
|
|
|
1650
1652
|
tensor->buffer = tensor->view_src->buffer;
|
|
1651
1653
|
tensor->data = (char *)tensor->view_src->data + tensor->view_offs;
|
|
1652
|
-
ggml_backend_buffer_init_tensor(tensor->buffer, tensor);
|
|
1654
|
+
return ggml_backend_buffer_init_tensor(tensor->buffer, tensor);
|
|
1653
1655
|
}
|
|
1654
1656
|
|
|
1655
|
-
|
|
1657
|
+
enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) {
|
|
1656
1658
|
GGML_ASSERT(tensor->buffer == NULL);
|
|
1657
1659
|
GGML_ASSERT(tensor->data == NULL);
|
|
1658
1660
|
GGML_ASSERT(tensor->view_src == NULL);
|
|
@@ -1662,7 +1664,7 @@ void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor
|
|
|
1662
1664
|
|
|
1663
1665
|
tensor->buffer = buffer;
|
|
1664
1666
|
tensor->data = addr;
|
|
1665
|
-
ggml_backend_buffer_init_tensor(buffer, tensor);
|
|
1667
|
+
return ggml_backend_buffer_init_tensor(buffer, tensor);
|
|
1666
1668
|
}
|
|
1667
1669
|
|
|
1668
1670
|
static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies,
|
|
@@ -1708,7 +1710,8 @@ static void graph_copy_init_tensor(struct ggml_hash_set * hash_set, struct ggml_
|
|
|
1708
1710
|
struct ggml_tensor * dst = node_copies[id];
|
|
1709
1711
|
if (dst->view_src != NULL) {
|
|
1710
1712
|
graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src);
|
|
1711
|
-
ggml_backend_view_init(dst);
|
|
1713
|
+
enum ggml_status status = ggml_backend_view_init(dst);
|
|
1714
|
+
GGML_ASSERT(status == GGML_STATUS_SUCCESS);
|
|
1712
1715
|
}
|
|
1713
1716
|
else {
|
|
1714
1717
|
ggml_backend_tensor_copy(src, dst);
|
|
@@ -1823,7 +1826,6 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t
|
|
|
1823
1826
|
assert(g1->n_nodes == g2->n_nodes);
|
|
1824
1827
|
|
|
1825
1828
|
for (int i = 0; i < g1->n_nodes; i++) {
|
|
1826
|
-
//printf("eval %d/%d\n", i, g1->n_nodes);
|
|
1827
1829
|
struct ggml_tensor * t1 = g1->nodes[i];
|
|
1828
1830
|
struct ggml_tensor * t2 = g2->nodes[i];
|
|
1829
1831
|
|
|
@@ -2790,10 +2790,14 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
|
|
|
2790
2790
|
(char*)output_buffer + batch1 * output_stride, ACL_FLOAT16,
|
|
2791
2791
|
output_elem_size, output_ne, output_nb, 2, ACL_FORMAT_ND,
|
|
2792
2792
|
output_ne_offset);
|
|
2793
|
+
int64_t antiquantGroupSize = 0;
|
|
2794
|
+
if (src0->ne[0] > QK8_0) {
|
|
2795
|
+
antiquantGroupSize = QK8_0;
|
|
2796
|
+
}
|
|
2793
2797
|
|
|
2794
2798
|
ACL_CHECK(aclnnWeightQuantBatchMatmulV2GetWorkspaceSize(
|
|
2795
2799
|
acl_input_tensor, acl_weight_tensor, acl_scale_tensor, nullptr,
|
|
2796
|
-
nullptr, nullptr, nullptr,
|
|
2800
|
+
nullptr, nullptr, nullptr, antiquantGroupSize, acl_output_tensor,
|
|
2797
2801
|
&workspaceSize, &executor));
|
|
2798
2802
|
if (workspaceAddr == nullptr) {
|
|
2799
2803
|
workspaceAddr = workspace_allocator.alloc(workspaceSize);
|
|
@@ -2833,7 +2837,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
|
|
|
2833
2837
|
|
|
2834
2838
|
ACL_CHECK(aclnnWeightQuantBatchMatmulV2GetWorkspaceSize(
|
|
2835
2839
|
acl_input_tensor, acl_weight_tensor, acl_scale_tensor,
|
|
2836
|
-
nullptr, nullptr, nullptr, nullptr,
|
|
2840
|
+
nullptr, nullptr, nullptr, nullptr, antiquantGroupSize,
|
|
2837
2841
|
acl_output_tensor, &workspaceSize, &executor));
|
|
2838
2842
|
ACL_CHECK(aclnnWeightQuantBatchMatmulV2(
|
|
2839
2843
|
workspaceAddr, workspaceSize, executor, ctx.stream()));
|