@fugood/llama.node 1.3.8 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.js +25 -18
- package/lib/binding.ts +19 -1
- package/lib/index.js +3 -3
- package/lib/index.ts +1 -1
- package/package.json +17 -17
- package/scripts/llama.cpp.patch +53 -4
- package/src/LlamaCompletionWorker.cpp +2 -2
- package/src/LlamaContext.cpp +6 -1
- package/src/llama.cpp/common/arg.cpp +1 -1
- package/src/llama.cpp/common/chat-parser.cpp +968 -0
- package/src/llama.cpp/common/chat.cpp +0 -952
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -2
- package/src/llama.cpp/ggml/CMakeLists.txt +1 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +1 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -4
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +336 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +11 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +22 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +234 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +6 -0
- package/src/llama.cpp/src/CMakeLists.txt +1 -0
- package/src/llama.cpp/src/llama-arch.cpp +48 -3
- package/src/llama.cpp/src/llama-arch.h +2 -0
- package/src/llama.cpp/src/llama-context.cpp +6 -2
- package/src/llama.cpp/src/llama-hparams.h +1 -1
- package/src/llama.cpp/src/llama-model.cpp +102 -5
- package/src/llama.cpp/src/llama-model.h +4 -0
- package/src/llama.cpp/src/llama-quant.cpp +13 -5
- package/src/llama.cpp/src/models/lfm2.cpp +5 -3
- package/src/llama.cpp/src/models/models.h +51 -1
- package/src/llama.cpp/src/models/qwen3next.cpp +1042 -0
package/lib/binding.js
CHANGED
|
@@ -15,23 +15,13 @@ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (
|
|
|
15
15
|
}) : function(o, v) {
|
|
16
16
|
o["default"] = v;
|
|
17
17
|
});
|
|
18
|
-
var __importStar = (this && this.__importStar) ||
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
return ownKeys(o);
|
|
26
|
-
};
|
|
27
|
-
return function (mod) {
|
|
28
|
-
if (mod && mod.__esModule) return mod;
|
|
29
|
-
var result = {};
|
|
30
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
-
__setModuleDefault(result, mod);
|
|
32
|
-
return result;
|
|
33
|
-
};
|
|
34
|
-
})();
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
35
25
|
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
36
26
|
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
37
27
|
return new (P || (P = Promise))(function (resolve, reject) {
|
|
@@ -41,8 +31,12 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
41
31
|
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
42
32
|
});
|
|
43
33
|
};
|
|
34
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
35
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
36
|
+
};
|
|
44
37
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
45
38
|
exports.isLibVariantAvailable = exports.loadModule = void 0;
|
|
39
|
+
const path_1 = __importDefault(require("path"));
|
|
46
40
|
const getPlatformPackageName = (variant) => {
|
|
47
41
|
const platform = process.platform;
|
|
48
42
|
const arch = process.arch;
|
|
@@ -58,7 +52,20 @@ const loadPlatformPackage = (packageName) => __awaiter(void 0, void 0, void 0, f
|
|
|
58
52
|
}
|
|
59
53
|
});
|
|
60
54
|
const loadModule = (variant) => __awaiter(void 0, void 0, void 0, function* () {
|
|
61
|
-
|
|
55
|
+
const packageName = getPlatformPackageName(variant);
|
|
56
|
+
// Set ADSP_LIBRARY_PATH for load HTP libs
|
|
57
|
+
if (variant === 'snapdragon') {
|
|
58
|
+
const adspLibraryPath = process.env.ADSP_LIBRARY_PATH;
|
|
59
|
+
if (!adspLibraryPath) {
|
|
60
|
+
try {
|
|
61
|
+
process.env.ADSP_LIBRARY_PATH = path_1.default.dirname(require.resolve(packageName));
|
|
62
|
+
}
|
|
63
|
+
catch (_a) {
|
|
64
|
+
/* no-op */
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
let module = yield loadPlatformPackage(packageName);
|
|
62
69
|
if (module) {
|
|
63
70
|
return module;
|
|
64
71
|
}
|
package/lib/binding.ts
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import path from 'path'
|
|
2
|
+
|
|
1
3
|
export type MessagePart = {
|
|
2
4
|
type: string
|
|
3
5
|
text?: string
|
|
@@ -600,7 +602,23 @@ const loadPlatformPackage = async (
|
|
|
600
602
|
}
|
|
601
603
|
|
|
602
604
|
export const loadModule = async (variant?: LibVariant): Promise<Module> => {
|
|
603
|
-
|
|
605
|
+
const packageName = getPlatformPackageName(variant)
|
|
606
|
+
|
|
607
|
+
// Set ADSP_LIBRARY_PATH for load HTP libs
|
|
608
|
+
if (variant === 'snapdragon') {
|
|
609
|
+
const adspLibraryPath = process.env.ADSP_LIBRARY_PATH
|
|
610
|
+
if (!adspLibraryPath) {
|
|
611
|
+
try {
|
|
612
|
+
process.env.ADSP_LIBRARY_PATH = path.dirname(
|
|
613
|
+
require.resolve(packageName),
|
|
614
|
+
)
|
|
615
|
+
} catch {
|
|
616
|
+
/* no-op */
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
let module = await loadPlatformPackage(packageName)
|
|
604
622
|
if (module) {
|
|
605
623
|
return module
|
|
606
624
|
}
|
package/lib/index.js
CHANGED
|
@@ -87,9 +87,9 @@ class LlamaContextWrapper {
|
|
|
87
87
|
return !!this.ctx.getModelInfo().chatTemplates.llamaChat;
|
|
88
88
|
}
|
|
89
89
|
getFormattedChat(messages, template, params) {
|
|
90
|
-
var _a;
|
|
90
|
+
var _a, _b;
|
|
91
91
|
const { messages: chat, has_media, media_paths } = (0, utils_1.formatMediaChat)(messages);
|
|
92
|
-
const useJinja = this.isJinjaSupported() && (params === null || params === void 0 ? void 0 : params.jinja);
|
|
92
|
+
const useJinja = this.isJinjaSupported() && ((_a = params === null || params === void 0 ? void 0 : params.jinja) !== null && _a !== void 0 ? _a : true);
|
|
93
93
|
let tmpl;
|
|
94
94
|
if (template)
|
|
95
95
|
tmpl = template; // Force replace if provided
|
|
@@ -99,7 +99,7 @@ class LlamaContextWrapper {
|
|
|
99
99
|
tools: params === null || params === void 0 ? void 0 : params.tools,
|
|
100
100
|
parallel_tool_calls: params === null || params === void 0 ? void 0 : params.parallel_tool_calls,
|
|
101
101
|
tool_choice: params === null || params === void 0 ? void 0 : params.tool_choice,
|
|
102
|
-
enable_thinking: (
|
|
102
|
+
enable_thinking: (_b = params === null || params === void 0 ? void 0 : params.enable_thinking) !== null && _b !== void 0 ? _b : true,
|
|
103
103
|
add_generation_prompt: params === null || params === void 0 ? void 0 : params.add_generation_prompt,
|
|
104
104
|
now: params === null || params === void 0 ? void 0 : params.now,
|
|
105
105
|
chat_template_kwargs: (params === null || params === void 0 ? void 0 : params.chat_template_kwargs)
|
package/lib/index.ts
CHANGED
|
@@ -124,7 +124,7 @@ class LlamaContextWrapper {
|
|
|
124
124
|
): FormattedChatResult {
|
|
125
125
|
const { messages: chat, has_media, media_paths } = formatMediaChat(messages)
|
|
126
126
|
|
|
127
|
-
const useJinja = this.isJinjaSupported() && params?.jinja
|
|
127
|
+
const useJinja = this.isJinjaSupported() && (params?.jinja ?? true)
|
|
128
128
|
let tmpl
|
|
129
129
|
if (template) tmpl = template // Force replace if provided
|
|
130
130
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.4.1",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,20 +72,20 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-
|
|
76
|
-
"@fugood/node-llama-
|
|
77
|
-
"@fugood/node-llama-linux-
|
|
78
|
-
"@fugood/node-llama-linux-arm64-
|
|
79
|
-
"@fugood/node-llama-linux-arm64": "1.
|
|
80
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.
|
|
81
|
-
"@fugood/node-llama-linux-
|
|
82
|
-
"@fugood/node-llama-
|
|
83
|
-
"@fugood/node-llama-
|
|
84
|
-
"@fugood/node-llama-win32-
|
|
85
|
-
"@fugood/node-llama-win32-arm64": "1.
|
|
86
|
-
"@fugood/node-llama-win32-
|
|
87
|
-
"@fugood/node-llama-
|
|
88
|
-
"@fugood/node-llama-
|
|
75
|
+
"@fugood/node-llama-darwin-arm64": "1.4.1",
|
|
76
|
+
"@fugood/node-llama-darwin-x64": "1.4.1",
|
|
77
|
+
"@fugood/node-llama-linux-arm64": "1.4.1",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.4.1",
|
|
79
|
+
"@fugood/node-llama-linux-arm64-snapdragon": "1.4.1",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.4.1",
|
|
81
|
+
"@fugood/node-llama-linux-x64": "1.4.1",
|
|
82
|
+
"@fugood/node-llama-linux-x64-cuda": "1.4.1",
|
|
83
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.4.1",
|
|
84
|
+
"@fugood/node-llama-win32-arm64": "1.4.1",
|
|
85
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.4.1",
|
|
86
|
+
"@fugood/node-llama-win32-x64": "1.4.1",
|
|
87
|
+
"@fugood/node-llama-win32-x64-cuda": "1.4.1",
|
|
88
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.4.1"
|
|
89
89
|
},
|
|
90
90
|
"devDependencies": {
|
|
91
91
|
"@babel/preset-env": "^7.24.4",
|
|
@@ -100,7 +100,7 @@
|
|
|
100
100
|
"jest": "^29.7.0",
|
|
101
101
|
"node-addon-api": "^8.0.0",
|
|
102
102
|
"node-wav": "^0.0.2",
|
|
103
|
-
"release-it": "^
|
|
103
|
+
"release-it": "^19.0.6",
|
|
104
104
|
"rimraf": "^6.0.1",
|
|
105
105
|
"typescript": "^5.4.5",
|
|
106
106
|
"wait-for-expect": "^3.0.2"
|
|
@@ -130,4 +130,4 @@
|
|
|
130
130
|
"singleQuote": true,
|
|
131
131
|
"printWidth": 80
|
|
132
132
|
}
|
|
133
|
-
}
|
|
133
|
+
}
|
package/scripts/llama.cpp.patch
CHANGED
|
@@ -21,7 +21,7 @@ index bb168e835..cfc0e2c2e 100644
|
|
|
21
21
|
|
|
22
22
|
#
|
|
23
23
|
diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
|
|
24
|
-
index
|
|
24
|
+
index b4a0f985e..2383d2ea9 100644
|
|
25
25
|
--- a/src/llama.cpp/common/chat.cpp
|
|
26
26
|
+++ b/src/llama.cpp/common/chat.cpp
|
|
27
27
|
@@ -6,9 +6,6 @@
|
|
@@ -51,7 +51,7 @@ index 6fa05a604..87dfa7a8b 100644
|
|
|
51
51
|
struct templates_params {
|
|
52
52
|
json messages;
|
|
53
53
|
json tools;
|
|
54
|
-
@@ -
|
|
54
|
+
@@ -709,7 +696,7 @@ static std::string apply(
|
|
55
55
|
tmpl_inputs.extra_context.merge_patch(*additional_context);
|
|
56
56
|
}
|
|
57
57
|
// TODO: add flag to control date/time, if only for testing purposes.
|
|
@@ -122,10 +122,59 @@ index 7e53a57b7..a328d4db4 100644
|
|
|
122
122
|
check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
|
|
123
123
|
if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
|
|
124
124
|
diff --git a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
125
|
-
index 72a82a891..
|
|
125
|
+
index 72a82a891..1b681f4dd 100644
|
|
126
126
|
--- a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
127
127
|
+++ b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
128
|
-
@@ -
|
|
128
|
+
@@ -3216,11 +3216,26 @@ static const char * ggml_backend_hexagon_device_get_description(ggml_backend_dev
|
|
129
|
+
GGML_UNUSED(dev);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
+
|
|
133
|
+
+// ~2GB per session for now
|
|
134
|
+
+#define GGML_HEXAGON_SESSION_MEMORY_DEFAULT (2ULL * 1024 * 1024 * 1024)
|
|
135
|
+
+// Max to 3.5GB
|
|
136
|
+
+#define GGML_HEXAGON_SESSION_MEMORY_MAX (3ULL * 1024 * 1024 * 1024 + 512ULL * 1024 * 1024)
|
|
137
|
+
+
|
|
138
|
+
static void ggml_backend_hexagon_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
|
139
|
+
- // ~2GB per session for now
|
|
140
|
+
- *free = 2ULL * 1024 * 1024 * 1024;
|
|
141
|
+
- *total = *free;
|
|
142
|
+
+ const char * str_mem = getenv("GGML_HEXAGON_SESSION_MEMORY");
|
|
143
|
+
+ if (str_mem) {
|
|
144
|
+
+ *free = std::stoull(str_mem);
|
|
145
|
+
+ if (*free < GGML_HEXAGON_SESSION_MEMORY_DEFAULT) {
|
|
146
|
+
+ *free = GGML_HEXAGON_SESSION_MEMORY_DEFAULT;
|
|
147
|
+
+ } else if (*free > GGML_HEXAGON_SESSION_MEMORY_MAX) {
|
|
148
|
+
+ *free = GGML_HEXAGON_SESSION_MEMORY_MAX;
|
|
149
|
+
+ }
|
|
150
|
+
+ } else {
|
|
151
|
+
+ *free = GGML_HEXAGON_SESSION_MEMORY_DEFAULT;
|
|
152
|
+
+ }
|
|
153
|
+
|
|
154
|
+
+ *total = *free;
|
|
155
|
+
GGML_UNUSED(dev);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
@@ -3401,10 +3416,17 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
+#if defined(__ANDROID__)
|
|
163
|
+
if(opt_arch < 75) {
|
|
164
|
+
opt_ndev = 1;
|
|
165
|
+
- GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v75.\n");
|
|
166
|
+
+ GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v75 for Android.\n");
|
|
167
|
+
+ }
|
|
168
|
+
+#else
|
|
169
|
+
+ if(opt_arch < 73) {
|
|
170
|
+
+ opt_ndev = 1;
|
|
171
|
+
+ GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v73 for Linux and Windows.\n");
|
|
172
|
+
}
|
|
173
|
+
+#endif
|
|
174
|
+
|
|
175
|
+
GGML_LOG_INFO("ggml-hex: Hexagon Arch version v%d\n", opt_arch);
|
|
176
|
+
|
|
177
|
+
@@ -3417,6 +3439,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
|
|
129
178
|
} catch (std::exception const &exc) {
|
|
130
179
|
GGML_LOG_ERROR("ggml-hex: failed to create device/session %zu\n", i);
|
|
131
180
|
devices[i].context = nullptr;
|
|
@@ -10,14 +10,14 @@ Napi::Array TokenProbsToArray(Napi::Env env, llama_context* ctx, const std::vect
|
|
|
10
10
|
const auto &prob = probs[i];
|
|
11
11
|
Napi::Object token_obj = Napi::Object::New(env);
|
|
12
12
|
|
|
13
|
-
std::string token_str =
|
|
13
|
+
std::string token_str = rnllama::tokens_to_output_formatted_string(ctx, prob.tok);
|
|
14
14
|
token_obj.Set("content", Napi::String::New(env, token_str));
|
|
15
15
|
|
|
16
16
|
Napi::Array token_probs = Napi::Array::New(env);
|
|
17
17
|
for (size_t j = 0; j < prob.probs.size(); j++) {
|
|
18
18
|
const auto &p = prob.probs[j];
|
|
19
19
|
Napi::Object prob_obj = Napi::Object::New(env);
|
|
20
|
-
std::string tok_str =
|
|
20
|
+
std::string tok_str = rnllama::tokens_to_output_formatted_string(ctx, p.tok);
|
|
21
21
|
prob_obj.Set("tok_str", Napi::String::New(env, tok_str));
|
|
22
22
|
prob_obj.Set("prob", Napi::Number::New(env, p.prob));
|
|
23
23
|
token_probs.Set(j, prob_obj);
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -321,15 +321,20 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
321
321
|
|
|
322
322
|
// Parse devices array
|
|
323
323
|
if (options.Has("devices") && options.Get("devices").IsArray()) {
|
|
324
|
+
std::vector<ggml_backend_dev_t> devs;
|
|
324
325
|
auto devices_array = options.Get("devices").As<Napi::Array>();
|
|
325
326
|
for (size_t i = 0; i < devices_array.Length(); i++) {
|
|
326
327
|
auto device_name = devices_array.Get(i).ToString().Utf8Value();
|
|
327
328
|
auto * dev = ggml_backend_dev_by_name(device_name.c_str());
|
|
328
329
|
if (dev) {
|
|
329
|
-
|
|
330
|
+
devs.push_back(dev);
|
|
330
331
|
}
|
|
331
332
|
// Skip invalid device names silently
|
|
332
333
|
}
|
|
334
|
+
if (!devs.empty()) {
|
|
335
|
+
params.devices = devs;
|
|
336
|
+
params.devices.push_back(nullptr); // nullptr terminator required by llama.cpp
|
|
337
|
+
}
|
|
333
338
|
}
|
|
334
339
|
|
|
335
340
|
std::vector<common_adapter_lora_info> lora;
|
|
@@ -980,7 +980,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
980
980
|
[](common_params & params) {
|
|
981
981
|
params.kv_unified = true;
|
|
982
982
|
}
|
|
983
|
-
).set_env("
|
|
983
|
+
).set_env("LLAMA_ARG_KV_UNIFIED"));
|
|
984
984
|
add_opt(common_arg(
|
|
985
985
|
{"--no-context-shift"},
|
|
986
986
|
string_format("disables context shift on infinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),
|