@fugood/llama.node 0.3.14 → 0.3.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +1 -1
- package/src/llama.cpp/.github/workflows/build.yml +30 -1
- package/src/llama.cpp/CMakeLists.txt +9 -1
- package/src/llama.cpp/cmake/common.cmake +2 -0
- package/src/llama.cpp/common/arg.cpp +20 -2
- package/src/llama.cpp/common/common.cpp +6 -3
- package/src/llama.cpp/common/speculative.cpp +4 -4
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +2 -2
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +2 -2
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +2 -2
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +2 -2
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +4 -4
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +6 -6
- package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
- package/src/llama.cpp/examples/main/main.cpp +6 -6
- package/src/llama.cpp/examples/parallel/parallel.cpp +5 -5
- package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +6 -6
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -2
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +1 -1
- package/src/llama.cpp/examples/run/run.cpp +91 -46
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +2 -2
- package/src/llama.cpp/examples/server/server.cpp +37 -15
- package/src/llama.cpp/examples/server/utils.hpp +3 -1
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
- package/src/llama.cpp/examples/speculative/speculative.cpp +14 -14
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/tts/tts.cpp +20 -9
- package/src/llama.cpp/ggml/CMakeLists.txt +1 -0
- package/src/llama.cpp/ggml/cmake/common.cmake +26 -0
- package/src/llama.cpp/ggml/include/ggml.h +24 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +10 -28
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +6 -2
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +15 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +1493 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +150 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +284 -29
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +2 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -1
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +95 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +35 -12
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +93 -27
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +12 -13
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +40 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +12 -43
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +109 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +19 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +114 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +305 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +398 -158
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +7 -2
- package/src/llama.cpp/ggml/src/ggml.c +85 -2
- package/src/llama.cpp/include/llama.h +86 -22
- package/src/llama.cpp/src/CMakeLists.txt +5 -2
- package/src/llama.cpp/src/llama-adapter.cpp +19 -20
- package/src/llama.cpp/src/llama-adapter.h +11 -9
- package/src/llama.cpp/src/llama-arch.cpp +103 -16
- package/src/llama.cpp/src/llama-arch.h +18 -0
- package/src/llama.cpp/src/llama-batch.h +2 -2
- package/src/llama.cpp/src/llama-context.cpp +2253 -1222
- package/src/llama.cpp/src/llama-context.h +214 -77
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +1662 -0
- package/src/llama.cpp/src/llama-graph.h +574 -0
- package/src/llama.cpp/src/llama-hparams.cpp +8 -0
- package/src/llama.cpp/src/llama-hparams.h +9 -0
- package/src/llama.cpp/src/llama-io.cpp +15 -0
- package/src/llama.cpp/src/llama-io.h +35 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +1006 -291
- package/src/llama.cpp/src/llama-kv-cache.h +178 -110
- package/src/llama.cpp/src/llama-memory.cpp +1 -0
- package/src/llama.cpp/src/llama-memory.h +21 -0
- package/src/llama.cpp/src/llama-model.cpp +8244 -173
- package/src/llama.cpp/src/llama-model.h +34 -1
- package/src/llama.cpp/src/llama-quant.cpp +10 -1
- package/src/llama.cpp/src/llama.cpp +51 -9984
- package/src/llama.cpp/tests/test-backend-ops.cpp +145 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +0 -143
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +0 -9
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
include(CheckCXXCompilerFlag)
|
|
2
|
+
include("../cmake/common.cmake")
|
|
2
3
|
|
|
3
4
|
add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES})
|
|
4
5
|
|
|
@@ -24,33 +25,6 @@ if (NOT MSVC)
|
|
|
24
25
|
endif()
|
|
25
26
|
endif()
|
|
26
27
|
|
|
27
|
-
function(ggml_get_flags CCID CCVER)
|
|
28
|
-
set(C_FLAGS "")
|
|
29
|
-
set(CXX_FLAGS "")
|
|
30
|
-
|
|
31
|
-
if (CCID MATCHES "Clang")
|
|
32
|
-
set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return)
|
|
33
|
-
set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)
|
|
34
|
-
|
|
35
|
-
if (
|
|
36
|
-
(CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
|
|
37
|
-
(CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
|
|
38
|
-
)
|
|
39
|
-
list(APPEND C_FLAGS -Wdouble-promotion)
|
|
40
|
-
endif()
|
|
41
|
-
elseif (CCID STREQUAL "GNU")
|
|
42
|
-
set(C_FLAGS -Wdouble-promotion)
|
|
43
|
-
set(CXX_FLAGS -Wno-array-bounds)
|
|
44
|
-
|
|
45
|
-
if (CCVER VERSION_GREATER_EQUAL 8.1.0)
|
|
46
|
-
list(APPEND CXX_FLAGS -Wextra-semi)
|
|
47
|
-
endif()
|
|
48
|
-
endif()
|
|
49
|
-
|
|
50
|
-
set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE)
|
|
51
|
-
set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
|
|
52
|
-
endfunction()
|
|
53
|
-
|
|
54
28
|
if (GGML_FATAL_WARNINGS)
|
|
55
29
|
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
|
56
30
|
list(APPEND C_FLAGS -Werror)
|
|
@@ -102,7 +76,11 @@ if (GGML_CCACHE)
|
|
|
102
76
|
set(GGML_CCACHE_VARIANT sccache)
|
|
103
77
|
endif()
|
|
104
78
|
# TODO: should not be set globally
|
|
105
|
-
|
|
79
|
+
if (GGML_SYCL AND GGML_CCACHE_FOUND AND WIN32)
|
|
80
|
+
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "ccache compiler_type=icl")
|
|
81
|
+
else ()
|
|
82
|
+
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${GGML_CCACHE_VARIANT}")
|
|
83
|
+
endif ()
|
|
106
84
|
set(ENV{CCACHE_SLOPPINESS} time_macros)
|
|
107
85
|
message(STATUS "${GGML_CCACHE_VARIANT} found, compilation results will be cached. Disable with GGML_CCACHE=OFF.")
|
|
108
86
|
else()
|
|
@@ -351,6 +329,10 @@ if (CMAKE_SYSTEM_NAME MATCHES "Android")
|
|
|
351
329
|
target_link_libraries(ggml-base PRIVATE dl)
|
|
352
330
|
endif()
|
|
353
331
|
|
|
332
|
+
if(CMAKE_SYSTEM_NAME MATCHES "visionOS")
|
|
333
|
+
target_compile_definitions(ggml-base PUBLIC _DARWIN_C_SOURCE)
|
|
334
|
+
endif()
|
|
335
|
+
|
|
354
336
|
if (BUILD_SHARED_LIBS)
|
|
355
337
|
foreach (target ggml-base ggml)
|
|
356
338
|
set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
@@ -2790,10 +2790,14 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
|
|
|
2790
2790
|
(char*)output_buffer + batch1 * output_stride, ACL_FLOAT16,
|
|
2791
2791
|
output_elem_size, output_ne, output_nb, 2, ACL_FORMAT_ND,
|
|
2792
2792
|
output_ne_offset);
|
|
2793
|
+
int64_t antiquantGroupSize = 0;
|
|
2794
|
+
if (src0->ne[0] > QK8_0) {
|
|
2795
|
+
antiquantGroupSize = QK8_0;
|
|
2796
|
+
}
|
|
2793
2797
|
|
|
2794
2798
|
ACL_CHECK(aclnnWeightQuantBatchMatmulV2GetWorkspaceSize(
|
|
2795
2799
|
acl_input_tensor, acl_weight_tensor, acl_scale_tensor, nullptr,
|
|
2796
|
-
nullptr, nullptr, nullptr,
|
|
2800
|
+
nullptr, nullptr, nullptr, antiquantGroupSize, acl_output_tensor,
|
|
2797
2801
|
&workspaceSize, &executor));
|
|
2798
2802
|
if (workspaceAddr == nullptr) {
|
|
2799
2803
|
workspaceAddr = workspace_allocator.alloc(workspaceSize);
|
|
@@ -2833,7 +2837,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
|
|
|
2833
2837
|
|
|
2834
2838
|
ACL_CHECK(aclnnWeightQuantBatchMatmulV2GetWorkspaceSize(
|
|
2835
2839
|
acl_input_tensor, acl_weight_tensor, acl_scale_tensor,
|
|
2836
|
-
nullptr, nullptr, nullptr, nullptr,
|
|
2840
|
+
nullptr, nullptr, nullptr, nullptr, antiquantGroupSize,
|
|
2837
2841
|
acl_output_tensor, &workspaceSize, &executor));
|
|
2838
2842
|
ACL_CHECK(aclnnWeightQuantBatchMatmulV2(
|
|
2839
2843
|
workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
@@ -1689,11 +1689,6 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
|
1689
1689
|
case GGML_OP_MUL_MAT: {
|
|
1690
1690
|
switch (op->src[0]->type) {
|
|
1691
1691
|
case GGML_TYPE_Q8_0:
|
|
1692
|
-
// Current groupsize should not be greater than k-1 in
|
|
1693
|
-
// aclnnWeightQuantBatchMatmulV2GetWorkspaceSize
|
|
1694
|
-
if (op->src[0]->ne[0] <= QK8_0) {
|
|
1695
|
-
return false;
|
|
1696
|
-
}
|
|
1697
1692
|
case GGML_TYPE_F16:
|
|
1698
1693
|
case GGML_TYPE_F32:
|
|
1699
1694
|
case GGML_TYPE_Q4_0:
|
|
@@ -287,17 +287,25 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
287
287
|
endif()
|
|
288
288
|
endif()
|
|
289
289
|
endif()
|
|
290
|
-
elseif (${CMAKE_SYSTEM_PROCESSOR}
|
|
290
|
+
elseif ("${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "ppc64le " OR "${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "powerpc ")
|
|
291
291
|
message(STATUS "PowerPC detected")
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
292
|
+
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
|
|
293
|
+
file(READ "/proc/cpuinfo" POWER10_M)
|
|
294
|
+
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "powerpc")
|
|
295
|
+
execute_process(COMMAND bash -c "prtconf |grep 'Implementation' | head -n 1" OUTPUT_VARIABLE POWER10_M)
|
|
296
|
+
endif()
|
|
297
|
+
|
|
298
|
+
string(REGEX MATCHALL "POWER *([0-9]+)" MATCHED_STRING "${POWER10_M}")
|
|
299
|
+
string(REGEX REPLACE "POWER *([0-9]+)" "\\1" EXTRACTED_NUMBER "${MATCHED_STRING}")
|
|
300
|
+
|
|
301
|
+
if (EXTRACTED_NUMBER GREATER_EQUAL 10)
|
|
302
|
+
list(APPEND ARCH_FLAGS -mcpu=power10 -mpowerpc64)
|
|
303
|
+
elseif (EXTRACTED_NUMBER EQUAL 9)
|
|
304
|
+
list(APPEND ARCH_FLAGS -mcpu=power9 -mpowerpc64)
|
|
297
305
|
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
|
|
298
306
|
list(APPEND ARCH_FLAGS -mcpu=powerpc64le -mtune=native)
|
|
299
307
|
else()
|
|
300
|
-
list(APPEND ARCH_FLAGS -mcpu=
|
|
308
|
+
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
|
|
301
309
|
endif()
|
|
302
310
|
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
|
|
303
311
|
message(STATUS "loongarch64 detected")
|