@fugood/llama.node 1.3.0-rc.6 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +12 -2
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +8 -9
- package/src/llama.cpp/common/CMakeLists.txt +2 -0
- package/src/llama.cpp/common/arg.cpp +39 -1001
- package/src/llama.cpp/common/arg.h +2 -2
- package/src/llama.cpp/common/chat.cpp +216 -2
- package/src/llama.cpp/common/chat.h +1 -0
- package/src/llama.cpp/common/common.cpp +33 -0
- package/src/llama.cpp/common/common.h +13 -0
- package/src/llama.cpp/common/download.cpp +1054 -0
- package/src/llama.cpp/common/download.h +55 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +19 -3
- package/src/llama.cpp/ggml/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/include/ggml-hexagon.h +19 -0
- package/src/llama.cpp/ggml/include/ggml.h +2 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +7 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +10 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +4 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +108 -49
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +3 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +0 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +172 -35
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +82 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +25 -25
- package/src/llama.cpp/include/llama.h +7 -3
- package/src/llama.cpp/src/CMakeLists.txt +95 -0
- package/src/llama.cpp/src/llama-arch.cpp +108 -0
- package/src/llama.cpp/src/llama-arch.h +11 -0
- package/src/llama.cpp/src/llama-batch.cpp +63 -31
- package/src/llama.cpp/src/llama-batch.h +12 -1
- package/src/llama.cpp/src/llama-chat.cpp +32 -0
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +44 -16
- package/src/llama.cpp/src/llama-context.h +5 -5
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +12 -7
- package/src/llama.cpp/src/llama-hparams.cpp +11 -1
- package/src/llama.cpp/src/llama-hparams.h +6 -0
- package/src/llama.cpp/src/llama-kv-cache-iswa.cpp +3 -1
- package/src/llama.cpp/src/llama-kv-cache.cpp +56 -21
- package/src/llama.cpp/src/llama-kv-cache.h +2 -4
- package/src/llama.cpp/src/llama-kv-cells.h +44 -2
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +18 -14
- package/src/llama.cpp/src/llama-memory-recurrent.h +2 -2
- package/src/llama.cpp/src/llama-model.cpp +350 -13194
- package/src/llama.cpp/src/llama-model.h +9 -2
- package/src/llama.cpp/src/llama-quant.cpp +1 -1
- package/src/llama.cpp/src/llama-vocab.cpp +5 -0
- package/src/llama.cpp/src/llama-vocab.h +1 -0
- package/src/llama.cpp/src/models/apertus.cpp +125 -0
- package/src/llama.cpp/src/models/arcee.cpp +135 -0
- package/src/llama.cpp/src/models/arctic.cpp +138 -0
- package/src/llama.cpp/src/models/arwkv7.cpp +86 -0
- package/src/llama.cpp/src/models/baichuan.cpp +122 -0
- package/src/llama.cpp/src/models/bailingmoe.cpp +144 -0
- package/src/llama.cpp/src/models/bailingmoe2.cpp +135 -0
- package/src/llama.cpp/src/models/bert.cpp +176 -0
- package/src/llama.cpp/src/models/bitnet.cpp +160 -0
- package/src/llama.cpp/src/models/bloom.cpp +101 -0
- package/src/llama.cpp/src/models/chameleon.cpp +178 -0
- package/src/llama.cpp/src/models/chatglm.cpp +132 -0
- package/src/llama.cpp/src/models/codeshell.cpp +111 -0
- package/src/llama.cpp/src/models/cogvlm.cpp +100 -0
- package/src/llama.cpp/src/models/cohere2-iswa.cpp +131 -0
- package/src/llama.cpp/src/models/command-r.cpp +122 -0
- package/src/llama.cpp/src/models/dbrx.cpp +123 -0
- package/src/llama.cpp/src/models/deci.cpp +135 -0
- package/src/llama.cpp/src/models/deepseek.cpp +144 -0
- package/src/llama.cpp/src/models/deepseek2.cpp +236 -0
- package/src/llama.cpp/src/models/dots1.cpp +134 -0
- package/src/llama.cpp/src/models/dream.cpp +105 -0
- package/src/llama.cpp/src/models/ernie4-5-moe.cpp +150 -0
- package/src/llama.cpp/src/models/ernie4-5.cpp +111 -0
- package/src/llama.cpp/src/models/exaone.cpp +114 -0
- package/src/llama.cpp/src/models/exaone4.cpp +123 -0
- package/src/llama.cpp/src/models/falcon-h1.cpp +113 -0
- package/src/llama.cpp/src/models/falcon.cpp +120 -0
- package/src/llama.cpp/src/models/gemma-embedding.cpp +120 -0
- package/src/llama.cpp/src/models/gemma.cpp +112 -0
- package/src/llama.cpp/src/models/gemma2-iswa.cpp +125 -0
- package/src/llama.cpp/src/models/gemma3-iswa.cpp +131 -0
- package/src/llama.cpp/src/models/gemma3n-iswa.cpp +377 -0
- package/src/llama.cpp/src/models/glm4-moe.cpp +153 -0
- package/src/llama.cpp/src/models/glm4.cpp +127 -0
- package/src/llama.cpp/src/models/gpt2.cpp +105 -0
- package/src/llama.cpp/src/models/gptneox.cpp +144 -0
- package/src/llama.cpp/src/models/granite-hybrid.cpp +196 -0
- package/src/llama.cpp/src/models/granite.cpp +211 -0
- package/src/llama.cpp/src/models/graph-context-mamba.cpp +283 -0
- package/src/llama.cpp/src/models/grok.cpp +159 -0
- package/src/llama.cpp/src/models/grovemoe.cpp +141 -0
- package/src/llama.cpp/src/models/hunyuan-dense.cpp +132 -0
- package/src/llama.cpp/src/models/hunyuan-moe.cpp +154 -0
- package/src/llama.cpp/src/models/internlm2.cpp +120 -0
- package/src/llama.cpp/src/models/jais.cpp +86 -0
- package/src/llama.cpp/src/models/jamba.cpp +106 -0
- package/src/llama.cpp/src/models/lfm2.cpp +173 -0
- package/src/llama.cpp/src/models/llada-moe.cpp +122 -0
- package/src/llama.cpp/src/models/llada.cpp +99 -0
- package/src/llama.cpp/src/models/llama-iswa.cpp +174 -0
- package/src/llama.cpp/src/models/llama.cpp +155 -0
- package/src/llama.cpp/src/models/mamba.cpp +55 -0
- package/src/llama.cpp/src/models/minicpm3.cpp +199 -0
- package/src/llama.cpp/src/models/minimax-m2.cpp +124 -0
- package/src/llama.cpp/src/models/models.h +481 -0
- package/src/llama.cpp/src/models/mpt.cpp +126 -0
- package/src/llama.cpp/src/models/nemotron-h.cpp +121 -0
- package/src/llama.cpp/src/models/nemotron.cpp +122 -0
- package/src/llama.cpp/src/models/neo-bert.cpp +104 -0
- package/src/llama.cpp/src/models/olmo.cpp +121 -0
- package/src/llama.cpp/src/models/olmo2.cpp +150 -0
- package/src/llama.cpp/src/models/olmoe.cpp +124 -0
- package/src/llama.cpp/src/models/openai-moe-iswa.cpp +123 -0
- package/src/llama.cpp/src/models/openelm.cpp +124 -0
- package/src/llama.cpp/src/models/orion.cpp +123 -0
- package/src/llama.cpp/src/models/pangu-embedded.cpp +121 -0
- package/src/llama.cpp/src/models/phi2.cpp +121 -0
- package/src/llama.cpp/src/models/phi3.cpp +152 -0
- package/src/llama.cpp/src/models/plamo.cpp +110 -0
- package/src/llama.cpp/src/models/plamo2.cpp +316 -0
- package/src/llama.cpp/src/models/plm.cpp +168 -0
- package/src/llama.cpp/src/models/qwen.cpp +108 -0
- package/src/llama.cpp/src/models/qwen2.cpp +117 -0
- package/src/llama.cpp/src/models/qwen2moe.cpp +151 -0
- package/src/llama.cpp/src/models/qwen2vl.cpp +117 -0
- package/src/llama.cpp/src/models/qwen3.cpp +117 -0
- package/src/llama.cpp/src/models/qwen3moe.cpp +124 -0
- package/src/llama.cpp/src/models/qwen3vl-moe.cpp +149 -0
- package/src/llama.cpp/src/models/qwen3vl.cpp +141 -0
- package/src/llama.cpp/src/models/refact.cpp +94 -0
- package/src/llama.cpp/src/models/rwkv6-base.cpp +162 -0
- package/src/llama.cpp/src/models/rwkv6.cpp +94 -0
- package/src/llama.cpp/src/models/rwkv6qwen2.cpp +86 -0
- package/src/llama.cpp/src/models/rwkv7-base.cpp +135 -0
- package/src/llama.cpp/src/models/rwkv7.cpp +90 -0
- package/src/llama.cpp/src/models/seed-oss.cpp +124 -0
- package/src/llama.cpp/src/models/smallthinker.cpp +120 -0
- package/src/llama.cpp/src/models/smollm3.cpp +128 -0
- package/src/llama.cpp/src/models/stablelm.cpp +146 -0
- package/src/llama.cpp/src/models/starcoder.cpp +100 -0
- package/src/llama.cpp/src/models/starcoder2.cpp +121 -0
- package/src/llama.cpp/src/models/t5-dec.cpp +166 -0
- package/src/llama.cpp/src/models/t5-enc.cpp +96 -0
- package/src/llama.cpp/src/models/wavtokenizer-dec.cpp +149 -0
- package/src/llama.cpp/src/models/xverse.cpp +108 -0
package/CMakeLists.txt
CHANGED
|
@@ -64,13 +64,23 @@ endif()
|
|
|
64
64
|
# Improve speed
|
|
65
65
|
if(CMAKE_BUILD_TYPE STREQUAL "Release")
|
|
66
66
|
if (MSVC)
|
|
67
|
-
|
|
67
|
+
# Enable parallel compilation for all MSVC builds
|
|
68
|
+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
|
|
69
|
+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /MP")
|
|
70
|
+
|
|
71
|
+
if (NOT GGML_VULKAN AND NOT GGML_CUDA)
|
|
72
|
+
# Full optimization with LTCG for default builds
|
|
68
73
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /Ob2 /Oi /Ot /Oy /GL")
|
|
69
74
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 /Ob2 /Oi /Ot /Oy /GL")
|
|
70
75
|
set(CMAKE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS} /LTCG")
|
|
71
|
-
|
|
76
|
+
elseif(GGML_VULKAN)
|
|
77
|
+
# Reduced optimization for Vulkan builds
|
|
72
78
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O1 /Ob1 /bigobj")
|
|
73
79
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O1 /Ob1 /bigobj")
|
|
80
|
+
else()
|
|
81
|
+
# Faster linking for CUDA builds (no LTCG)
|
|
82
|
+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /Ob2 /Oi")
|
|
83
|
+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 /Ob2 /Oi")
|
|
74
84
|
endif()
|
|
75
85
|
else()
|
|
76
86
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -funroll-loops -flto=auto")
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.3.
|
|
4
|
+
"version": "1.3.1",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,19 +72,19 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-linux-x64": "1.3.
|
|
76
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.3.
|
|
77
|
-
"@fugood/node-llama-linux-x64-cuda": "1.3.
|
|
78
|
-
"@fugood/node-llama-linux-arm64": "1.3.
|
|
79
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.3.
|
|
80
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.3.
|
|
81
|
-
"@fugood/node-llama-win32-x64": "1.3.
|
|
82
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.3.
|
|
83
|
-
"@fugood/node-llama-win32-x64-cuda": "1.3.
|
|
84
|
-
"@fugood/node-llama-win32-arm64": "1.3.
|
|
85
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.3.
|
|
86
|
-
"@fugood/node-llama-darwin-x64": "1.3.
|
|
87
|
-
"@fugood/node-llama-darwin-arm64": "1.3.
|
|
75
|
+
"@fugood/node-llama-linux-x64": "1.3.1",
|
|
76
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.3.1",
|
|
77
|
+
"@fugood/node-llama-linux-x64-cuda": "1.3.1",
|
|
78
|
+
"@fugood/node-llama-linux-arm64": "1.3.1",
|
|
79
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.3.1",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.3.1",
|
|
81
|
+
"@fugood/node-llama-win32-x64": "1.3.1",
|
|
82
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.3.1",
|
|
83
|
+
"@fugood/node-llama-win32-x64-cuda": "1.3.1",
|
|
84
|
+
"@fugood/node-llama-win32-arm64": "1.3.1",
|
|
85
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.3.1",
|
|
86
|
+
"@fugood/node-llama-darwin-x64": "1.3.1",
|
|
87
|
+
"@fugood/node-llama-darwin-arm64": "1.3.1"
|
|
88
88
|
},
|
|
89
89
|
"devDependencies": {
|
|
90
90
|
"@babel/preset-env": "^7.24.4",
|
package/scripts/llama.cpp.patch
CHANGED
|
@@ -21,7 +21,7 @@ index fe290bf8f..d377e29b9 100644
|
|
|
21
21
|
|
|
22
22
|
#
|
|
23
23
|
diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
|
|
24
|
-
index
|
|
24
|
+
index 938872e82..6364f173f 100644
|
|
25
25
|
--- a/src/llama.cpp/common/chat.cpp
|
|
26
26
|
+++ b/src/llama.cpp/common/chat.cpp
|
|
27
27
|
@@ -6,9 +6,6 @@
|
|
@@ -31,10 +31,10 @@ index 8587140e1..7931a31a1 100644
|
|
|
31
31
|
-#include <minja/chat-template.hpp>
|
|
32
32
|
-#include <minja/minja.hpp>
|
|
33
33
|
-
|
|
34
|
+
#include <algorithm>
|
|
34
35
|
#include <cstdio>
|
|
35
|
-
#include <
|
|
36
|
-
|
|
37
|
-
@@ -123,16 +120,6 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
|
|
36
|
+
#include <cctype>
|
|
37
|
+
@@ -126,16 +123,6 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
|
|
38
38
|
return diffs;
|
|
39
39
|
}
|
|
40
40
|
|
|
@@ -51,18 +51,17 @@ index 8587140e1..7931a31a1 100644
|
|
|
51
51
|
struct templates_params {
|
|
52
52
|
json messages;
|
|
53
53
|
json tools;
|
|
54
|
-
@@ -
|
|
55
|
-
if (additional_context) {
|
|
54
|
+
@@ -811,7 +798,7 @@ static std::string apply(
|
|
56
55
|
tmpl_inputs.extra_context.merge_patch(*additional_context);
|
|
57
56
|
}
|
|
58
|
-
|
|
57
|
+
// TODO: add flag to control date/time, if only for testing purposes.
|
|
59
58
|
- // tmpl_inputs.now = std::chrono::system_clock::now();
|
|
60
59
|
+ tmpl_inputs.now = inputs.now;
|
|
61
60
|
|
|
62
61
|
minja::chat_template_options tmpl_opts;
|
|
63
62
|
// To avoid double BOS / EOS tokens, we're manually removing begining / trailing tokens
|
|
64
63
|
diff --git a/src/llama.cpp/common/chat.h b/src/llama.cpp/common/chat.h
|
|
65
|
-
index
|
|
64
|
+
index 50efb0d4e..f471a84c7 100644
|
|
66
65
|
--- a/src/llama.cpp/common/chat.h
|
|
67
66
|
+++ b/src/llama.cpp/common/chat.h
|
|
68
67
|
@@ -9,7 +9,18 @@
|
|
@@ -110,7 +109,7 @@ index a8cb630ea..0919ec5d3 100644
|
|
|
110
109
|
int32_t n_ctx = 4096; // context size
|
|
111
110
|
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
|
|
112
111
|
diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
113
|
-
index
|
|
112
|
+
index 23ec8bb08..33c93cba7 100644
|
|
114
113
|
--- a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
115
114
|
+++ b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
116
115
|
@@ -106,7 +106,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|