@fugood/llama.node 1.3.8 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.js +18 -1
- package/lib/binding.ts +19 -1
- package/lib/index.js +3 -3
- package/lib/index.ts +1 -1
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +2 -2
- package/src/LlamaCompletionWorker.cpp +2 -2
- package/src/llama.cpp/common/arg.cpp +1 -1
- package/src/llama.cpp/common/chat-parser.cpp +968 -0
- package/src/llama.cpp/common/chat.cpp +0 -952
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -2
- package/src/llama.cpp/ggml/CMakeLists.txt +1 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +1 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -4
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +336 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +11 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +22 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +234 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +6 -0
- package/src/llama.cpp/src/CMakeLists.txt +1 -0
- package/src/llama.cpp/src/llama-arch.cpp +48 -3
- package/src/llama.cpp/src/llama-arch.h +2 -0
- package/src/llama.cpp/src/llama-context.cpp +6 -2
- package/src/llama.cpp/src/llama-hparams.h +1 -1
- package/src/llama.cpp/src/llama-model.cpp +102 -5
- package/src/llama.cpp/src/llama-model.h +4 -0
- package/src/llama.cpp/src/llama-quant.cpp +13 -5
- package/src/llama.cpp/src/models/lfm2.cpp +5 -3
- package/src/llama.cpp/src/models/models.h +51 -1
- package/src/llama.cpp/src/models/qwen3next.cpp +1042 -0
package/lib/binding.js
CHANGED
|
@@ -41,8 +41,12 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
41
41
|
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
42
42
|
});
|
|
43
43
|
};
|
|
44
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
45
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
46
|
+
};
|
|
44
47
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
45
48
|
exports.isLibVariantAvailable = exports.loadModule = void 0;
|
|
49
|
+
const path_1 = __importDefault(require("path"));
|
|
46
50
|
const getPlatformPackageName = (variant) => {
|
|
47
51
|
const platform = process.platform;
|
|
48
52
|
const arch = process.arch;
|
|
@@ -58,7 +62,20 @@ const loadPlatformPackage = (packageName) => __awaiter(void 0, void 0, void 0, f
|
|
|
58
62
|
}
|
|
59
63
|
});
|
|
60
64
|
const loadModule = (variant) => __awaiter(void 0, void 0, void 0, function* () {
|
|
61
|
-
|
|
65
|
+
const packageName = getPlatformPackageName(variant);
|
|
66
|
+
// Set ADSP_LIBRARY_PATH for load HTP libs
|
|
67
|
+
if (variant === 'snapdragon') {
|
|
68
|
+
const adspLibraryPath = process.env.ADSP_LIBRARY_PATH;
|
|
69
|
+
if (!adspLibraryPath) {
|
|
70
|
+
try {
|
|
71
|
+
process.env.ADSP_LIBRARY_PATH = path_1.default.dirname(require.resolve(packageName));
|
|
72
|
+
}
|
|
73
|
+
catch (_a) {
|
|
74
|
+
/* no-op */
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
let module = yield loadPlatformPackage(packageName);
|
|
62
79
|
if (module) {
|
|
63
80
|
return module;
|
|
64
81
|
}
|
package/lib/binding.ts
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import path from 'path'
|
|
2
|
+
|
|
1
3
|
export type MessagePart = {
|
|
2
4
|
type: string
|
|
3
5
|
text?: string
|
|
@@ -600,7 +602,23 @@ const loadPlatformPackage = async (
|
|
|
600
602
|
}
|
|
601
603
|
|
|
602
604
|
export const loadModule = async (variant?: LibVariant): Promise<Module> => {
|
|
603
|
-
|
|
605
|
+
const packageName = getPlatformPackageName(variant)
|
|
606
|
+
|
|
607
|
+
// Set ADSP_LIBRARY_PATH for load HTP libs
|
|
608
|
+
if (variant === 'snapdragon') {
|
|
609
|
+
const adspLibraryPath = process.env.ADSP_LIBRARY_PATH
|
|
610
|
+
if (!adspLibraryPath) {
|
|
611
|
+
try {
|
|
612
|
+
process.env.ADSP_LIBRARY_PATH = path.dirname(
|
|
613
|
+
require.resolve(packageName),
|
|
614
|
+
)
|
|
615
|
+
} catch {
|
|
616
|
+
/* no-op */
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
let module = await loadPlatformPackage(packageName)
|
|
604
622
|
if (module) {
|
|
605
623
|
return module
|
|
606
624
|
}
|
package/lib/index.js
CHANGED
|
@@ -87,9 +87,9 @@ class LlamaContextWrapper {
|
|
|
87
87
|
return !!this.ctx.getModelInfo().chatTemplates.llamaChat;
|
|
88
88
|
}
|
|
89
89
|
getFormattedChat(messages, template, params) {
|
|
90
|
-
var _a;
|
|
90
|
+
var _a, _b;
|
|
91
91
|
const { messages: chat, has_media, media_paths } = (0, utils_1.formatMediaChat)(messages);
|
|
92
|
-
const useJinja = this.isJinjaSupported() && (params === null || params === void 0 ? void 0 : params.jinja);
|
|
92
|
+
const useJinja = this.isJinjaSupported() && ((_a = params === null || params === void 0 ? void 0 : params.jinja) !== null && _a !== void 0 ? _a : true);
|
|
93
93
|
let tmpl;
|
|
94
94
|
if (template)
|
|
95
95
|
tmpl = template; // Force replace if provided
|
|
@@ -99,7 +99,7 @@ class LlamaContextWrapper {
|
|
|
99
99
|
tools: params === null || params === void 0 ? void 0 : params.tools,
|
|
100
100
|
parallel_tool_calls: params === null || params === void 0 ? void 0 : params.parallel_tool_calls,
|
|
101
101
|
tool_choice: params === null || params === void 0 ? void 0 : params.tool_choice,
|
|
102
|
-
enable_thinking: (
|
|
102
|
+
enable_thinking: (_b = params === null || params === void 0 ? void 0 : params.enable_thinking) !== null && _b !== void 0 ? _b : true,
|
|
103
103
|
add_generation_prompt: params === null || params === void 0 ? void 0 : params.add_generation_prompt,
|
|
104
104
|
now: params === null || params === void 0 ? void 0 : params.now,
|
|
105
105
|
chat_template_kwargs: (params === null || params === void 0 ? void 0 : params.chat_template_kwargs)
|
package/lib/index.ts
CHANGED
|
@@ -124,7 +124,7 @@ class LlamaContextWrapper {
|
|
|
124
124
|
): FormattedChatResult {
|
|
125
125
|
const { messages: chat, has_media, media_paths } = formatMediaChat(messages)
|
|
126
126
|
|
|
127
|
-
const useJinja = this.isJinjaSupported() && params?.jinja
|
|
127
|
+
const useJinja = this.isJinjaSupported() && (params?.jinja ?? true)
|
|
128
128
|
let tmpl
|
|
129
129
|
if (template) tmpl = template // Force replace if provided
|
|
130
130
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.4.0",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,20 +72,20 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-linux-x64": "1.
|
|
76
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.
|
|
77
|
-
"@fugood/node-llama-linux-x64-cuda": "1.
|
|
78
|
-
"@fugood/node-llama-linux-arm64-snapdragon": "1.
|
|
79
|
-
"@fugood/node-llama-linux-arm64": "1.
|
|
80
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.
|
|
81
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.
|
|
82
|
-
"@fugood/node-llama-win32-x64": "1.
|
|
83
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.
|
|
84
|
-
"@fugood/node-llama-win32-x64-cuda": "1.
|
|
85
|
-
"@fugood/node-llama-win32-arm64": "1.
|
|
86
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.
|
|
87
|
-
"@fugood/node-llama-darwin-x64": "1.
|
|
88
|
-
"@fugood/node-llama-darwin-arm64": "1.
|
|
75
|
+
"@fugood/node-llama-linux-x64": "1.4.0",
|
|
76
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.4.0",
|
|
77
|
+
"@fugood/node-llama-linux-x64-cuda": "1.4.0",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-snapdragon": "1.4.0",
|
|
79
|
+
"@fugood/node-llama-linux-arm64": "1.4.0",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.4.0",
|
|
81
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.4.0",
|
|
82
|
+
"@fugood/node-llama-win32-x64": "1.4.0",
|
|
83
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.4.0",
|
|
84
|
+
"@fugood/node-llama-win32-x64-cuda": "1.4.0",
|
|
85
|
+
"@fugood/node-llama-win32-arm64": "1.4.0",
|
|
86
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.4.0",
|
|
87
|
+
"@fugood/node-llama-darwin-x64": "1.4.0",
|
|
88
|
+
"@fugood/node-llama-darwin-arm64": "1.4.0"
|
|
89
89
|
},
|
|
90
90
|
"devDependencies": {
|
|
91
91
|
"@babel/preset-env": "^7.24.4",
|
package/scripts/llama.cpp.patch
CHANGED
|
@@ -21,7 +21,7 @@ index bb168e835..cfc0e2c2e 100644
|
|
|
21
21
|
|
|
22
22
|
#
|
|
23
23
|
diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
|
|
24
|
-
index
|
|
24
|
+
index b4a0f985e..2383d2ea9 100644
|
|
25
25
|
--- a/src/llama.cpp/common/chat.cpp
|
|
26
26
|
+++ b/src/llama.cpp/common/chat.cpp
|
|
27
27
|
@@ -6,9 +6,6 @@
|
|
@@ -51,7 +51,7 @@ index 6fa05a604..87dfa7a8b 100644
|
|
|
51
51
|
struct templates_params {
|
|
52
52
|
json messages;
|
|
53
53
|
json tools;
|
|
54
|
-
@@ -
|
|
54
|
+
@@ -709,7 +696,7 @@ static std::string apply(
|
|
55
55
|
tmpl_inputs.extra_context.merge_patch(*additional_context);
|
|
56
56
|
}
|
|
57
57
|
// TODO: add flag to control date/time, if only for testing purposes.
|
|
@@ -10,14 +10,14 @@ Napi::Array TokenProbsToArray(Napi::Env env, llama_context* ctx, const std::vect
|
|
|
10
10
|
const auto &prob = probs[i];
|
|
11
11
|
Napi::Object token_obj = Napi::Object::New(env);
|
|
12
12
|
|
|
13
|
-
std::string token_str =
|
|
13
|
+
std::string token_str = rnllama::tokens_to_output_formatted_string(ctx, prob.tok);
|
|
14
14
|
token_obj.Set("content", Napi::String::New(env, token_str));
|
|
15
15
|
|
|
16
16
|
Napi::Array token_probs = Napi::Array::New(env);
|
|
17
17
|
for (size_t j = 0; j < prob.probs.size(); j++) {
|
|
18
18
|
const auto &p = prob.probs[j];
|
|
19
19
|
Napi::Object prob_obj = Napi::Object::New(env);
|
|
20
|
-
std::string tok_str =
|
|
20
|
+
std::string tok_str = rnllama::tokens_to_output_formatted_string(ctx, p.tok);
|
|
21
21
|
prob_obj.Set("tok_str", Napi::String::New(env, tok_str));
|
|
22
22
|
prob_obj.Set("prob", Napi::Number::New(env, p.prob));
|
|
23
23
|
token_probs.Set(j, prob_obj);
|
|
@@ -980,7 +980,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
980
980
|
[](common_params & params) {
|
|
981
981
|
params.kv_unified = true;
|
|
982
982
|
}
|
|
983
|
-
).set_env("
|
|
983
|
+
).set_env("LLAMA_ARG_KV_UNIFIED"));
|
|
984
984
|
add_opt(common_arg(
|
|
985
985
|
{"--no-context-shift"},
|
|
986
986
|
string_format("disables context shift on infinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),
|