@fugood/llama.node 0.3.8 → 0.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.js +2 -2
- package/lib/binding.ts +52 -8
- package/lib/index.ts +3 -1
- package/package.json +8 -1
- package/src/LlamaCompletionWorker.cpp +33 -6
- package/src/LlamaCompletionWorker.h +3 -1
- package/src/LlamaContext.cpp +387 -28
- package/src/LlamaContext.h +5 -0
- package/src/common.hpp +19 -2
- package/src/llama.cpp/.github/workflows/build.yml +289 -107
- package/src/llama.cpp/.github/workflows/close-issue.yml +1 -1
- package/src/llama.cpp/.github/workflows/docker.yml +2 -1
- package/src/llama.cpp/.github/workflows/server.yml +25 -2
- package/src/llama.cpp/CMakeLists.txt +10 -19
- package/src/llama.cpp/cmake/build-info.cmake +1 -1
- package/src/llama.cpp/common/CMakeLists.txt +32 -0
- package/src/llama.cpp/common/arg.cpp +66 -16
- package/src/llama.cpp/common/chat-template.hpp +515 -0
- package/src/llama.cpp/common/chat.cpp +966 -0
- package/src/llama.cpp/common/chat.hpp +52 -0
- package/src/llama.cpp/common/common.cpp +159 -36
- package/src/llama.cpp/common/common.h +56 -14
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +46 -66
- package/src/llama.cpp/common/json-schema-to-grammar.h +15 -1
- package/src/llama.cpp/common/llguidance.cpp +270 -0
- package/src/llama.cpp/common/log.cpp +1 -10
- package/src/llama.cpp/common/log.h +10 -0
- package/src/llama.cpp/common/minja.hpp +2868 -0
- package/src/llama.cpp/common/sampling.cpp +22 -1
- package/src/llama.cpp/common/sampling.h +3 -0
- package/src/llama.cpp/docs/build.md +54 -9
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +12 -2
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +1 -1
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +59 -0
- package/src/llama.cpp/examples/llava/clip.cpp +133 -14
- package/src/llama.cpp/examples/llava/clip.h +2 -0
- package/src/llama.cpp/examples/llava/llava.cpp +22 -8
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +9 -1
- package/src/llama.cpp/examples/main/main.cpp +26 -25
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +136 -137
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +18 -4
- package/src/llama.cpp/examples/run/run.cpp +224 -69
- package/src/llama.cpp/examples/server/server.cpp +252 -81
- package/src/llama.cpp/examples/server/utils.hpp +73 -21
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +6 -4
- package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +11 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +78 -1
- package/src/llama.cpp/ggml/include/ggml.h +1 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +21 -4
- package/src/llama.cpp/ggml/src/ggml-alloc.c +1 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +91 -78
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +7 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +46 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +16 -1
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +28 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +5 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +33 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +1 -5
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +323 -121
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
- package/src/llama.cpp/ggml/src/ggml.c +23 -13
- package/src/llama.cpp/include/llama.h +14 -1
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +46 -0
- package/src/llama.cpp/src/CMakeLists.txt +1 -1
- package/src/llama.cpp/src/llama-arch.cpp +7 -2
- package/src/llama.cpp/src/llama-arch.h +3 -1
- package/src/llama.cpp/src/llama-chat.cpp +11 -2
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-grammar.cpp +86 -6
- package/src/llama.cpp/src/llama-grammar.h +22 -1
- package/src/llama.cpp/src/llama-mmap.cpp +1 -0
- package/src/llama.cpp/src/llama-model-loader.cpp +1 -1
- package/src/llama.cpp/src/llama-model.cpp +76 -6
- package/src/llama.cpp/src/llama-sampling.cpp +47 -4
- package/src/llama.cpp/src/llama-vocab.cpp +10 -4
- package/src/llama.cpp/src/llama.cpp +181 -123
- package/src/llama.cpp/tests/CMakeLists.txt +4 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +158 -57
- package/src/llama.cpp/tests/test-chat-template.cpp +154 -31
- package/src/llama.cpp/tests/test-chat.cpp +607 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +2 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +1140 -0
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -32
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/bin/win32/arm64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/binding.js
CHANGED
|
@@ -52,11 +52,11 @@ const loadModule = (variant) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
52
52
|
try {
|
|
53
53
|
if (variant && variant !== 'default') {
|
|
54
54
|
setupEnv(variant);
|
|
55
|
-
return yield Promise.resolve(`${`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s)));
|
|
55
|
+
return (yield Promise.resolve(`${`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s))));
|
|
56
56
|
}
|
|
57
57
|
}
|
|
58
58
|
catch (_a) { } // ignore errors and try the common path
|
|
59
59
|
setupEnv();
|
|
60
|
-
return yield Promise.resolve(`${`../bin/${process.platform}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s)));
|
|
60
|
+
return (yield Promise.resolve(`${`../bin/${process.platform}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s))));
|
|
61
61
|
});
|
|
62
62
|
exports.loadModule = loadModule;
|
package/lib/binding.ts
CHANGED
|
@@ -2,11 +2,12 @@ import * as path from 'path'
|
|
|
2
2
|
|
|
3
3
|
export type ChatMessage = {
|
|
4
4
|
role: string
|
|
5
|
-
|
|
5
|
+
content: string
|
|
6
6
|
}
|
|
7
7
|
|
|
8
8
|
export type LlamaModelOptions = {
|
|
9
9
|
model: string
|
|
10
|
+
chat_template?: string
|
|
10
11
|
embedding?: boolean
|
|
11
12
|
embd_normalize?: number
|
|
12
13
|
pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
|
|
@@ -37,12 +38,29 @@ export type LlamaModelOptions = {
|
|
|
37
38
|
use_mlock?: boolean
|
|
38
39
|
use_mmap?: boolean
|
|
39
40
|
vocab_only?: boolean
|
|
41
|
+
lora?: string
|
|
42
|
+
lora_scaled?: number
|
|
43
|
+
lora_list?: { path: string; scaled: number }[]
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export type CompletionResponseFormat = {
|
|
47
|
+
type: 'text' | 'json_object' | 'json_schema'
|
|
48
|
+
json_schema?: {
|
|
49
|
+
strict?: boolean
|
|
50
|
+
schema: object
|
|
51
|
+
}
|
|
52
|
+
schema?: object // for json_object type
|
|
40
53
|
}
|
|
41
54
|
|
|
42
55
|
export type LlamaCompletionOptions = {
|
|
43
56
|
messages?: ChatMessage[]
|
|
57
|
+
jinja?: boolean
|
|
58
|
+
chat_template?: string
|
|
59
|
+
response_format?: CompletionResponseFormat
|
|
60
|
+
tools?: object
|
|
61
|
+
parallel_tool_calls?: boolean
|
|
62
|
+
tool_choice?: string
|
|
44
63
|
prompt?: string
|
|
45
|
-
n_samples?: number
|
|
46
64
|
temperature?: number
|
|
47
65
|
top_k?: number
|
|
48
66
|
top_p?: number
|
|
@@ -67,6 +85,9 @@ export type LlamaCompletionOptions = {
|
|
|
67
85
|
seed?: number
|
|
68
86
|
stop?: string[]
|
|
69
87
|
grammar?: string
|
|
88
|
+
grammar_lazy?: boolean
|
|
89
|
+
grammar_triggers?: { word: string; at_start: boolean }[]
|
|
90
|
+
preserved_tokens?: string[]
|
|
70
91
|
}
|
|
71
92
|
|
|
72
93
|
export type LlamaCompletionResult = {
|
|
@@ -102,8 +123,21 @@ export interface LlamaContext {
|
|
|
102
123
|
new (options: LlamaModelOptions): LlamaContext
|
|
103
124
|
getSystemInfo(): string
|
|
104
125
|
getModelInfo(): object
|
|
105
|
-
getFormattedChat(
|
|
106
|
-
|
|
126
|
+
getFormattedChat(
|
|
127
|
+
messages: ChatMessage[],
|
|
128
|
+
chat_template?: string,
|
|
129
|
+
params?: {
|
|
130
|
+
jinja?: boolean
|
|
131
|
+
response_format?: CompletionResponseFormat
|
|
132
|
+
tools?: object
|
|
133
|
+
parallel_tool_calls?: object
|
|
134
|
+
tool_choice?: string
|
|
135
|
+
},
|
|
136
|
+
): object | string
|
|
137
|
+
completion(
|
|
138
|
+
options: LlamaCompletionOptions,
|
|
139
|
+
callback?: (token: LlamaCompletionToken) => void,
|
|
140
|
+
): Promise<LlamaCompletionResult>
|
|
107
141
|
stopCompletion(): void
|
|
108
142
|
tokenize(text: string): Promise<TokenizeResult>
|
|
109
143
|
detokenize(tokens: number[]): Promise<string>
|
|
@@ -111,6 +145,9 @@ export interface LlamaContext {
|
|
|
111
145
|
saveSession(path: string): Promise<void>
|
|
112
146
|
loadSession(path: string): Promise<void>
|
|
113
147
|
release(): Promise<void>
|
|
148
|
+
applyLoraAdapters(adapters: { path: string; scaled: number }[]): void
|
|
149
|
+
removeLoraAdapters(adapters: { path: string }[]): void
|
|
150
|
+
getLoadedLoraAdapters(): { path: string; scaled: number }[]
|
|
114
151
|
// static
|
|
115
152
|
loadModelInfo(path: string, skip: string[]): Promise<Object>
|
|
116
153
|
}
|
|
@@ -123,7 +160,10 @@ export type LibVariant = 'default' | 'vulkan' | 'cuda'
|
|
|
123
160
|
|
|
124
161
|
const setupEnv = (variant?: string) => {
|
|
125
162
|
const postfix = variant ? `-${variant}` : ''
|
|
126
|
-
const binPath = path.resolve(
|
|
163
|
+
const binPath = path.resolve(
|
|
164
|
+
__dirname,
|
|
165
|
+
`../bin/${process.platform}${postfix}/${process.arch}/`,
|
|
166
|
+
)
|
|
127
167
|
const systemPathEnv = process.env.PATH ?? process.env.Path ?? ''
|
|
128
168
|
if (!systemPathEnv.includes(binPath)) {
|
|
129
169
|
if (process.platform === 'win32') {
|
|
@@ -138,9 +178,13 @@ export const loadModule = async (variant?: LibVariant): Promise<Module> => {
|
|
|
138
178
|
try {
|
|
139
179
|
if (variant && variant !== 'default') {
|
|
140
180
|
setupEnv(variant)
|
|
141
|
-
return await import(
|
|
181
|
+
return (await import(
|
|
182
|
+
`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`
|
|
183
|
+
)) as Module
|
|
142
184
|
}
|
|
143
185
|
} catch {} // ignore errors and try the common path
|
|
144
186
|
setupEnv()
|
|
145
|
-
return await import(
|
|
146
|
-
}
|
|
187
|
+
return (await import(
|
|
188
|
+
`../bin/${process.platform}/${process.arch}/llama-node.node`
|
|
189
|
+
)) as Module
|
|
190
|
+
}
|
package/lib/index.ts
CHANGED
|
@@ -9,7 +9,9 @@ export interface LlamaModelOptionsExtended extends LlamaModelOptions {
|
|
|
9
9
|
|
|
10
10
|
const mods: { [key: string]: Module } = {}
|
|
11
11
|
|
|
12
|
-
export const loadModel = async (
|
|
12
|
+
export const loadModel = async (
|
|
13
|
+
options: LlamaModelOptionsExtended,
|
|
14
|
+
): Promise<LlamaContext> => {
|
|
13
15
|
const variant = options.lib_variant ?? 'default'
|
|
14
16
|
mods[variant] ??= await loadModule(options.lib_variant)
|
|
15
17
|
return new mods[variant].LlamaContext(options)
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "0.3.
|
|
4
|
+
"version": "0.3.10",
|
|
5
5
|
"description": "Llama.cpp for Node.js",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -77,5 +77,12 @@
|
|
|
77
77
|
"testMatch": [
|
|
78
78
|
"**/*.test.ts"
|
|
79
79
|
]
|
|
80
|
+
},
|
|
81
|
+
"prettier": {
|
|
82
|
+
"trailingComma": "all",
|
|
83
|
+
"tabWidth": 2,
|
|
84
|
+
"semi": false,
|
|
85
|
+
"singleQuote": true,
|
|
86
|
+
"printWidth": 80
|
|
80
87
|
}
|
|
81
88
|
}
|
|
@@ -35,9 +35,10 @@ size_t findStoppingStrings(const std::string &text,
|
|
|
35
35
|
LlamaCompletionWorker::LlamaCompletionWorker(
|
|
36
36
|
const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
37
37
|
Napi::Function callback, common_params params,
|
|
38
|
-
std::vector<std::string> stop_words
|
|
38
|
+
std::vector<std::string> stop_words,
|
|
39
|
+
int32_t chat_format)
|
|
39
40
|
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
|
|
40
|
-
_params(params), _stop_words(stop_words) {
|
|
41
|
+
_params(params), _stop_words(stop_words), _chat_format(chat_format) {
|
|
41
42
|
if (!callback.IsEmpty()) {
|
|
42
43
|
_tsfn = Napi::ThreadSafeFunction::New(info.Env(), callback,
|
|
43
44
|
"LlamaCompletionCallback", 0, 1);
|
|
@@ -152,15 +153,41 @@ void LlamaCompletionWorker::Execute() {
|
|
|
152
153
|
}
|
|
153
154
|
|
|
154
155
|
void LlamaCompletionWorker::OnOK() {
|
|
155
|
-
auto
|
|
156
|
-
result
|
|
156
|
+
auto env = Napi::AsyncWorker::Env();
|
|
157
|
+
auto result = Napi::Object::New(env);
|
|
158
|
+
result.Set("tokens_evaluated", Napi::Number::New(env,
|
|
157
159
|
_result.tokens_evaluated));
|
|
158
160
|
result.Set("tokens_predicted", Napi::Number::New(Napi::AsyncWorker::Env(),
|
|
159
161
|
_result.tokens_predicted));
|
|
160
162
|
result.Set("truncated",
|
|
161
|
-
Napi::Boolean::New(
|
|
163
|
+
Napi::Boolean::New(env, _result.truncated));
|
|
162
164
|
result.Set("text",
|
|
163
|
-
Napi::String::New(
|
|
165
|
+
Napi::String::New(env, _result.text.c_str()));
|
|
166
|
+
|
|
167
|
+
Napi::Array tool_calls = Napi::Array::New(Napi::AsyncWorker::Env());
|
|
168
|
+
if (!_stop) {
|
|
169
|
+
try {
|
|
170
|
+
common_chat_msg message = common_chat_parse(_result.text, static_cast<common_chat_format>(_chat_format));
|
|
171
|
+
for (size_t i = 0; i < message.tool_calls.size(); i++) {
|
|
172
|
+
const auto &tc = message.tool_calls[i];
|
|
173
|
+
Napi::Object tool_call = Napi::Object::New(env);
|
|
174
|
+
tool_call.Set("type", "function");
|
|
175
|
+
Napi::Object function = Napi::Object::New(env);
|
|
176
|
+
function.Set("name", tc.name);
|
|
177
|
+
function.Set("arguments", tc.arguments);
|
|
178
|
+
tool_call.Set("function", function);
|
|
179
|
+
if (!tc.id.empty()) {
|
|
180
|
+
tool_call.Set("id", tc.id);
|
|
181
|
+
}
|
|
182
|
+
tool_calls.Set(i, tool_call);
|
|
183
|
+
}
|
|
184
|
+
} catch (const std::exception &e) {
|
|
185
|
+
// console_log(env, "Error parsing tool calls: " + std::string(e.what()));
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
if (tool_calls.Length() > 0) {
|
|
189
|
+
result.Set("tool_calls", tool_calls);
|
|
190
|
+
}
|
|
164
191
|
|
|
165
192
|
auto ctx = _sess->context();
|
|
166
193
|
const auto timings_token = llama_perf_context(ctx);
|
|
@@ -13,7 +13,8 @@ class LlamaCompletionWorker : public Napi::AsyncWorker,
|
|
|
13
13
|
public:
|
|
14
14
|
LlamaCompletionWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
15
15
|
Napi::Function callback, common_params params,
|
|
16
|
-
std::vector<std::string> stop_words = {}
|
|
16
|
+
std::vector<std::string> stop_words = {},
|
|
17
|
+
int32_t chat_format = 0);
|
|
17
18
|
|
|
18
19
|
~LlamaCompletionWorker();
|
|
19
20
|
|
|
@@ -30,6 +31,7 @@ private:
|
|
|
30
31
|
LlamaSessionPtr _sess;
|
|
31
32
|
common_params _params;
|
|
32
33
|
std::vector<std::string> _stop_words;
|
|
34
|
+
int32_t _chat_format;
|
|
33
35
|
Napi::ThreadSafeFunction _tsfn;
|
|
34
36
|
bool _has_callback = false;
|
|
35
37
|
bool _stop = false;
|