@fugood/llama.node 0.3.9 → 0.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.js +2 -2
- package/lib/binding.ts +46 -8
- package/lib/index.ts +3 -1
- package/package.json +8 -1
- package/src/LlamaCompletionWorker.cpp +33 -6
- package/src/LlamaCompletionWorker.h +3 -1
- package/src/LlamaContext.cpp +292 -28
- package/src/LlamaContext.h +1 -0
- package/src/common.hpp +19 -2
- package/src/llama.cpp/.github/workflows/build.yml +289 -107
- package/src/llama.cpp/.github/workflows/close-issue.yml +1 -1
- package/src/llama.cpp/.github/workflows/docker.yml +2 -1
- package/src/llama.cpp/.github/workflows/server.yml +25 -2
- package/src/llama.cpp/CMakeLists.txt +10 -19
- package/src/llama.cpp/cmake/build-info.cmake +1 -1
- package/src/llama.cpp/common/CMakeLists.txt +32 -0
- package/src/llama.cpp/common/arg.cpp +66 -16
- package/src/llama.cpp/common/chat-template.hpp +515 -0
- package/src/llama.cpp/common/chat.cpp +966 -0
- package/src/llama.cpp/common/chat.hpp +52 -0
- package/src/llama.cpp/common/common.cpp +159 -36
- package/src/llama.cpp/common/common.h +56 -14
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +46 -66
- package/src/llama.cpp/common/json-schema-to-grammar.h +15 -1
- package/src/llama.cpp/common/llguidance.cpp +270 -0
- package/src/llama.cpp/common/log.cpp +1 -10
- package/src/llama.cpp/common/log.h +10 -0
- package/src/llama.cpp/common/minja.hpp +2868 -0
- package/src/llama.cpp/common/sampling.cpp +22 -1
- package/src/llama.cpp/common/sampling.h +3 -0
- package/src/llama.cpp/docs/build.md +54 -9
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +12 -2
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +1 -1
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +59 -0
- package/src/llama.cpp/examples/llava/clip.cpp +133 -14
- package/src/llama.cpp/examples/llava/clip.h +2 -0
- package/src/llama.cpp/examples/llava/llava.cpp +22 -8
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +9 -1
- package/src/llama.cpp/examples/main/main.cpp +26 -25
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +136 -137
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +18 -4
- package/src/llama.cpp/examples/run/run.cpp +224 -69
- package/src/llama.cpp/examples/server/server.cpp +252 -81
- package/src/llama.cpp/examples/server/utils.hpp +73 -21
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +6 -4
- package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +11 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +78 -1
- package/src/llama.cpp/ggml/include/ggml.h +1 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +21 -4
- package/src/llama.cpp/ggml/src/ggml-alloc.c +1 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +91 -78
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +7 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +46 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +16 -1
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +28 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +5 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +33 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +1 -5
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +323 -121
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
- package/src/llama.cpp/ggml/src/ggml.c +23 -13
- package/src/llama.cpp/include/llama.h +14 -1
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +46 -0
- package/src/llama.cpp/src/CMakeLists.txt +1 -1
- package/src/llama.cpp/src/llama-arch.cpp +7 -2
- package/src/llama.cpp/src/llama-arch.h +3 -1
- package/src/llama.cpp/src/llama-chat.cpp +11 -2
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-grammar.cpp +86 -6
- package/src/llama.cpp/src/llama-grammar.h +22 -1
- package/src/llama.cpp/src/llama-mmap.cpp +1 -0
- package/src/llama.cpp/src/llama-model-loader.cpp +1 -1
- package/src/llama.cpp/src/llama-model.cpp +76 -6
- package/src/llama.cpp/src/llama-sampling.cpp +47 -4
- package/src/llama.cpp/src/llama-vocab.cpp +10 -4
- package/src/llama.cpp/src/llama.cpp +181 -123
- package/src/llama.cpp/tests/CMakeLists.txt +4 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +158 -57
- package/src/llama.cpp/tests/test-chat-template.cpp +154 -31
- package/src/llama.cpp/tests/test-chat.cpp +607 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +2 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +1140 -0
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -32
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/bin/win32/arm64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/binding.js
CHANGED
|
@@ -52,11 +52,11 @@ const loadModule = (variant) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
52
52
|
try {
|
|
53
53
|
if (variant && variant !== 'default') {
|
|
54
54
|
setupEnv(variant);
|
|
55
|
-
return yield Promise.resolve(`${`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s)));
|
|
55
|
+
return (yield Promise.resolve(`${`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s))));
|
|
56
56
|
}
|
|
57
57
|
}
|
|
58
58
|
catch (_a) { } // ignore errors and try the common path
|
|
59
59
|
setupEnv();
|
|
60
|
-
return yield Promise.resolve(`${`../bin/${process.platform}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s)));
|
|
60
|
+
return (yield Promise.resolve(`${`../bin/${process.platform}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s))));
|
|
61
61
|
});
|
|
62
62
|
exports.loadModule = loadModule;
|
package/lib/binding.ts
CHANGED
|
@@ -2,11 +2,12 @@ import * as path from 'path'
|
|
|
2
2
|
|
|
3
3
|
export type ChatMessage = {
|
|
4
4
|
role: string
|
|
5
|
-
|
|
5
|
+
content: string
|
|
6
6
|
}
|
|
7
7
|
|
|
8
8
|
export type LlamaModelOptions = {
|
|
9
9
|
model: string
|
|
10
|
+
chat_template?: string
|
|
10
11
|
embedding?: boolean
|
|
11
12
|
embd_normalize?: number
|
|
12
13
|
pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
|
|
@@ -42,10 +43,24 @@ export type LlamaModelOptions = {
|
|
|
42
43
|
lora_list?: { path: string; scaled: number }[]
|
|
43
44
|
}
|
|
44
45
|
|
|
46
|
+
export type CompletionResponseFormat = {
|
|
47
|
+
type: 'text' | 'json_object' | 'json_schema'
|
|
48
|
+
json_schema?: {
|
|
49
|
+
strict?: boolean
|
|
50
|
+
schema: object
|
|
51
|
+
}
|
|
52
|
+
schema?: object // for json_object type
|
|
53
|
+
}
|
|
54
|
+
|
|
45
55
|
export type LlamaCompletionOptions = {
|
|
46
56
|
messages?: ChatMessage[]
|
|
57
|
+
jinja?: boolean
|
|
58
|
+
chat_template?: string
|
|
59
|
+
response_format?: CompletionResponseFormat
|
|
60
|
+
tools?: object
|
|
61
|
+
parallel_tool_calls?: boolean
|
|
62
|
+
tool_choice?: string
|
|
47
63
|
prompt?: string
|
|
48
|
-
n_samples?: number
|
|
49
64
|
temperature?: number
|
|
50
65
|
top_k?: number
|
|
51
66
|
top_p?: number
|
|
@@ -70,6 +85,9 @@ export type LlamaCompletionOptions = {
|
|
|
70
85
|
seed?: number
|
|
71
86
|
stop?: string[]
|
|
72
87
|
grammar?: string
|
|
88
|
+
grammar_lazy?: boolean
|
|
89
|
+
grammar_triggers?: { word: string; at_start: boolean }[]
|
|
90
|
+
preserved_tokens?: string[]
|
|
73
91
|
}
|
|
74
92
|
|
|
75
93
|
export type LlamaCompletionResult = {
|
|
@@ -105,8 +123,21 @@ export interface LlamaContext {
|
|
|
105
123
|
new (options: LlamaModelOptions): LlamaContext
|
|
106
124
|
getSystemInfo(): string
|
|
107
125
|
getModelInfo(): object
|
|
108
|
-
getFormattedChat(
|
|
109
|
-
|
|
126
|
+
getFormattedChat(
|
|
127
|
+
messages: ChatMessage[],
|
|
128
|
+
chat_template?: string,
|
|
129
|
+
params?: {
|
|
130
|
+
jinja?: boolean
|
|
131
|
+
response_format?: CompletionResponseFormat
|
|
132
|
+
tools?: object
|
|
133
|
+
parallel_tool_calls?: object
|
|
134
|
+
tool_choice?: string
|
|
135
|
+
},
|
|
136
|
+
): object | string
|
|
137
|
+
completion(
|
|
138
|
+
options: LlamaCompletionOptions,
|
|
139
|
+
callback?: (token: LlamaCompletionToken) => void,
|
|
140
|
+
): Promise<LlamaCompletionResult>
|
|
110
141
|
stopCompletion(): void
|
|
111
142
|
tokenize(text: string): Promise<TokenizeResult>
|
|
112
143
|
detokenize(tokens: number[]): Promise<string>
|
|
@@ -129,7 +160,10 @@ export type LibVariant = 'default' | 'vulkan' | 'cuda'
|
|
|
129
160
|
|
|
130
161
|
const setupEnv = (variant?: string) => {
|
|
131
162
|
const postfix = variant ? `-${variant}` : ''
|
|
132
|
-
const binPath = path.resolve(
|
|
163
|
+
const binPath = path.resolve(
|
|
164
|
+
__dirname,
|
|
165
|
+
`../bin/${process.platform}${postfix}/${process.arch}/`,
|
|
166
|
+
)
|
|
133
167
|
const systemPathEnv = process.env.PATH ?? process.env.Path ?? ''
|
|
134
168
|
if (!systemPathEnv.includes(binPath)) {
|
|
135
169
|
if (process.platform === 'win32') {
|
|
@@ -144,9 +178,13 @@ export const loadModule = async (variant?: LibVariant): Promise<Module> => {
|
|
|
144
178
|
try {
|
|
145
179
|
if (variant && variant !== 'default') {
|
|
146
180
|
setupEnv(variant)
|
|
147
|
-
return await import(
|
|
181
|
+
return (await import(
|
|
182
|
+
`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`
|
|
183
|
+
)) as Module
|
|
148
184
|
}
|
|
149
185
|
} catch {} // ignore errors and try the common path
|
|
150
186
|
setupEnv()
|
|
151
|
-
return await import(
|
|
152
|
-
}
|
|
187
|
+
return (await import(
|
|
188
|
+
`../bin/${process.platform}/${process.arch}/llama-node.node`
|
|
189
|
+
)) as Module
|
|
190
|
+
}
|
package/lib/index.ts
CHANGED
|
@@ -9,7 +9,9 @@ export interface LlamaModelOptionsExtended extends LlamaModelOptions {
|
|
|
9
9
|
|
|
10
10
|
const mods: { [key: string]: Module } = {}
|
|
11
11
|
|
|
12
|
-
export const loadModel = async (
|
|
12
|
+
export const loadModel = async (
|
|
13
|
+
options: LlamaModelOptionsExtended,
|
|
14
|
+
): Promise<LlamaContext> => {
|
|
13
15
|
const variant = options.lib_variant ?? 'default'
|
|
14
16
|
mods[variant] ??= await loadModule(options.lib_variant)
|
|
15
17
|
return new mods[variant].LlamaContext(options)
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "0.3.
|
|
4
|
+
"version": "0.3.10",
|
|
5
5
|
"description": "Llama.cpp for Node.js",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -77,5 +77,12 @@
|
|
|
77
77
|
"testMatch": [
|
|
78
78
|
"**/*.test.ts"
|
|
79
79
|
]
|
|
80
|
+
},
|
|
81
|
+
"prettier": {
|
|
82
|
+
"trailingComma": "all",
|
|
83
|
+
"tabWidth": 2,
|
|
84
|
+
"semi": false,
|
|
85
|
+
"singleQuote": true,
|
|
86
|
+
"printWidth": 80
|
|
80
87
|
}
|
|
81
88
|
}
|
|
@@ -35,9 +35,10 @@ size_t findStoppingStrings(const std::string &text,
|
|
|
35
35
|
LlamaCompletionWorker::LlamaCompletionWorker(
|
|
36
36
|
const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
37
37
|
Napi::Function callback, common_params params,
|
|
38
|
-
std::vector<std::string> stop_words
|
|
38
|
+
std::vector<std::string> stop_words,
|
|
39
|
+
int32_t chat_format)
|
|
39
40
|
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
|
|
40
|
-
_params(params), _stop_words(stop_words) {
|
|
41
|
+
_params(params), _stop_words(stop_words), _chat_format(chat_format) {
|
|
41
42
|
if (!callback.IsEmpty()) {
|
|
42
43
|
_tsfn = Napi::ThreadSafeFunction::New(info.Env(), callback,
|
|
43
44
|
"LlamaCompletionCallback", 0, 1);
|
|
@@ -152,15 +153,41 @@ void LlamaCompletionWorker::Execute() {
|
|
|
152
153
|
}
|
|
153
154
|
|
|
154
155
|
void LlamaCompletionWorker::OnOK() {
|
|
155
|
-
auto
|
|
156
|
-
result
|
|
156
|
+
auto env = Napi::AsyncWorker::Env();
|
|
157
|
+
auto result = Napi::Object::New(env);
|
|
158
|
+
result.Set("tokens_evaluated", Napi::Number::New(env,
|
|
157
159
|
_result.tokens_evaluated));
|
|
158
160
|
result.Set("tokens_predicted", Napi::Number::New(Napi::AsyncWorker::Env(),
|
|
159
161
|
_result.tokens_predicted));
|
|
160
162
|
result.Set("truncated",
|
|
161
|
-
Napi::Boolean::New(
|
|
163
|
+
Napi::Boolean::New(env, _result.truncated));
|
|
162
164
|
result.Set("text",
|
|
163
|
-
Napi::String::New(
|
|
165
|
+
Napi::String::New(env, _result.text.c_str()));
|
|
166
|
+
|
|
167
|
+
Napi::Array tool_calls = Napi::Array::New(Napi::AsyncWorker::Env());
|
|
168
|
+
if (!_stop) {
|
|
169
|
+
try {
|
|
170
|
+
common_chat_msg message = common_chat_parse(_result.text, static_cast<common_chat_format>(_chat_format));
|
|
171
|
+
for (size_t i = 0; i < message.tool_calls.size(); i++) {
|
|
172
|
+
const auto &tc = message.tool_calls[i];
|
|
173
|
+
Napi::Object tool_call = Napi::Object::New(env);
|
|
174
|
+
tool_call.Set("type", "function");
|
|
175
|
+
Napi::Object function = Napi::Object::New(env);
|
|
176
|
+
function.Set("name", tc.name);
|
|
177
|
+
function.Set("arguments", tc.arguments);
|
|
178
|
+
tool_call.Set("function", function);
|
|
179
|
+
if (!tc.id.empty()) {
|
|
180
|
+
tool_call.Set("id", tc.id);
|
|
181
|
+
}
|
|
182
|
+
tool_calls.Set(i, tool_call);
|
|
183
|
+
}
|
|
184
|
+
} catch (const std::exception &e) {
|
|
185
|
+
// console_log(env, "Error parsing tool calls: " + std::string(e.what()));
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
if (tool_calls.Length() > 0) {
|
|
189
|
+
result.Set("tool_calls", tool_calls);
|
|
190
|
+
}
|
|
164
191
|
|
|
165
192
|
auto ctx = _sess->context();
|
|
166
193
|
const auto timings_token = llama_perf_context(ctx);
|
|
@@ -13,7 +13,8 @@ class LlamaCompletionWorker : public Napi::AsyncWorker,
|
|
|
13
13
|
public:
|
|
14
14
|
LlamaCompletionWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
15
15
|
Napi::Function callback, common_params params,
|
|
16
|
-
std::vector<std::string> stop_words = {}
|
|
16
|
+
std::vector<std::string> stop_words = {},
|
|
17
|
+
int32_t chat_format = 0);
|
|
17
18
|
|
|
18
19
|
~LlamaCompletionWorker();
|
|
19
20
|
|
|
@@ -30,6 +31,7 @@ private:
|
|
|
30
31
|
LlamaSessionPtr _sess;
|
|
31
32
|
common_params _params;
|
|
32
33
|
std::vector<std::string> _stop_words;
|
|
34
|
+
int32_t _chat_format;
|
|
33
35
|
Napi::ThreadSafeFunction _tsfn;
|
|
34
36
|
bool _has_callback = false;
|
|
35
37
|
bool _stop = false;
|