@fugood/llama.node 0.3.9 → 0.3.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.js +2 -2
- package/lib/binding.ts +47 -8
- package/lib/index.js +21 -1
- package/lib/index.ts +31 -1
- package/package.json +12 -3
- package/src/LlamaCompletionWorker.cpp +33 -6
- package/src/LlamaCompletionWorker.h +3 -1
- package/src/LlamaContext.cpp +336 -28
- package/src/LlamaContext.h +2 -0
- package/src/common.hpp +19 -2
- package/src/llama.cpp/.github/workflows/build.yml +289 -107
- package/src/llama.cpp/.github/workflows/close-issue.yml +1 -1
- package/src/llama.cpp/.github/workflows/docker.yml +2 -1
- package/src/llama.cpp/.github/workflows/server.yml +25 -2
- package/src/llama.cpp/CMakeLists.txt +10 -19
- package/src/llama.cpp/cmake/build-info.cmake +1 -1
- package/src/llama.cpp/common/CMakeLists.txt +32 -0
- package/src/llama.cpp/common/arg.cpp +66 -16
- package/src/llama.cpp/common/chat-template.hpp +515 -0
- package/src/llama.cpp/common/chat.cpp +966 -0
- package/src/llama.cpp/common/chat.hpp +52 -0
- package/src/llama.cpp/common/common.cpp +159 -36
- package/src/llama.cpp/common/common.h +56 -14
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +46 -66
- package/src/llama.cpp/common/json-schema-to-grammar.h +15 -1
- package/src/llama.cpp/common/llguidance.cpp +270 -0
- package/src/llama.cpp/common/log.cpp +1 -10
- package/src/llama.cpp/common/log.h +10 -0
- package/src/llama.cpp/common/minja.hpp +2868 -0
- package/src/llama.cpp/common/sampling.cpp +22 -1
- package/src/llama.cpp/common/sampling.h +3 -0
- package/src/llama.cpp/docs/build.md +54 -9
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +12 -2
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +1 -1
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +59 -0
- package/src/llama.cpp/examples/llava/clip.cpp +133 -14
- package/src/llama.cpp/examples/llava/clip.h +2 -0
- package/src/llama.cpp/examples/llava/llava.cpp +22 -8
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +9 -1
- package/src/llama.cpp/examples/main/main.cpp +26 -25
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +136 -137
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +18 -4
- package/src/llama.cpp/examples/run/run.cpp +224 -69
- package/src/llama.cpp/examples/server/server.cpp +252 -81
- package/src/llama.cpp/examples/server/utils.hpp +73 -21
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +6 -4
- package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +11 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +78 -1
- package/src/llama.cpp/ggml/include/ggml.h +1 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +21 -4
- package/src/llama.cpp/ggml/src/ggml-alloc.c +1 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +91 -78
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +7 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +46 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +16 -1
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +28 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +5 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +33 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +1 -5
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +323 -121
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
- package/src/llama.cpp/ggml/src/ggml.c +23 -13
- package/src/llama.cpp/include/llama.h +14 -1
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +46 -0
- package/src/llama.cpp/src/CMakeLists.txt +1 -1
- package/src/llama.cpp/src/llama-arch.cpp +7 -2
- package/src/llama.cpp/src/llama-arch.h +3 -1
- package/src/llama.cpp/src/llama-chat.cpp +11 -2
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-grammar.cpp +86 -6
- package/src/llama.cpp/src/llama-grammar.h +22 -1
- package/src/llama.cpp/src/llama-mmap.cpp +1 -0
- package/src/llama.cpp/src/llama-model-loader.cpp +1 -1
- package/src/llama.cpp/src/llama-model.cpp +76 -6
- package/src/llama.cpp/src/llama-sampling.cpp +47 -4
- package/src/llama.cpp/src/llama-vocab.cpp +10 -4
- package/src/llama.cpp/src/llama.cpp +181 -123
- package/src/llama.cpp/tests/CMakeLists.txt +4 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +158 -57
- package/src/llama.cpp/tests/test-chat-template.cpp +154 -31
- package/src/llama.cpp/tests/test-chat.cpp +607 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +2 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +1140 -0
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -32
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/bin/win32/arm64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/binding.js
CHANGED
|
@@ -52,11 +52,11 @@ const loadModule = (variant) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
52
52
|
try {
|
|
53
53
|
if (variant && variant !== 'default') {
|
|
54
54
|
setupEnv(variant);
|
|
55
|
-
return yield Promise.resolve(`${`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s)));
|
|
55
|
+
return (yield Promise.resolve(`${`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s))));
|
|
56
56
|
}
|
|
57
57
|
}
|
|
58
58
|
catch (_a) { } // ignore errors and try the common path
|
|
59
59
|
setupEnv();
|
|
60
|
-
return yield Promise.resolve(`${`../bin/${process.platform}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s)));
|
|
60
|
+
return (yield Promise.resolve(`${`../bin/${process.platform}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s))));
|
|
61
61
|
});
|
|
62
62
|
exports.loadModule = loadModule;
|
package/lib/binding.ts
CHANGED
|
@@ -2,11 +2,12 @@ import * as path from 'path'
|
|
|
2
2
|
|
|
3
3
|
export type ChatMessage = {
|
|
4
4
|
role: string
|
|
5
|
-
|
|
5
|
+
content: string
|
|
6
6
|
}
|
|
7
7
|
|
|
8
8
|
export type LlamaModelOptions = {
|
|
9
9
|
model: string
|
|
10
|
+
chat_template?: string
|
|
10
11
|
embedding?: boolean
|
|
11
12
|
embd_normalize?: number
|
|
12
13
|
pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
|
|
@@ -42,10 +43,24 @@ export type LlamaModelOptions = {
|
|
|
42
43
|
lora_list?: { path: string; scaled: number }[]
|
|
43
44
|
}
|
|
44
45
|
|
|
46
|
+
export type CompletionResponseFormat = {
|
|
47
|
+
type: 'text' | 'json_object' | 'json_schema'
|
|
48
|
+
json_schema?: {
|
|
49
|
+
strict?: boolean
|
|
50
|
+
schema: object
|
|
51
|
+
}
|
|
52
|
+
schema?: object // for json_object type
|
|
53
|
+
}
|
|
54
|
+
|
|
45
55
|
export type LlamaCompletionOptions = {
|
|
46
56
|
messages?: ChatMessage[]
|
|
57
|
+
jinja?: boolean
|
|
58
|
+
chat_template?: string
|
|
59
|
+
response_format?: CompletionResponseFormat
|
|
60
|
+
tools?: object
|
|
61
|
+
parallel_tool_calls?: boolean
|
|
62
|
+
tool_choice?: string
|
|
47
63
|
prompt?: string
|
|
48
|
-
n_samples?: number
|
|
49
64
|
temperature?: number
|
|
50
65
|
top_k?: number
|
|
51
66
|
top_p?: number
|
|
@@ -70,6 +85,9 @@ export type LlamaCompletionOptions = {
|
|
|
70
85
|
seed?: number
|
|
71
86
|
stop?: string[]
|
|
72
87
|
grammar?: string
|
|
88
|
+
grammar_lazy?: boolean
|
|
89
|
+
grammar_triggers?: { word: string; at_start: boolean }[]
|
|
90
|
+
preserved_tokens?: string[]
|
|
73
91
|
}
|
|
74
92
|
|
|
75
93
|
export type LlamaCompletionResult = {
|
|
@@ -105,8 +123,21 @@ export interface LlamaContext {
|
|
|
105
123
|
new (options: LlamaModelOptions): LlamaContext
|
|
106
124
|
getSystemInfo(): string
|
|
107
125
|
getModelInfo(): object
|
|
108
|
-
getFormattedChat(
|
|
109
|
-
|
|
126
|
+
getFormattedChat(
|
|
127
|
+
messages: ChatMessage[],
|
|
128
|
+
chat_template?: string,
|
|
129
|
+
params?: {
|
|
130
|
+
jinja?: boolean
|
|
131
|
+
response_format?: CompletionResponseFormat
|
|
132
|
+
tools?: object
|
|
133
|
+
parallel_tool_calls?: object
|
|
134
|
+
tool_choice?: string
|
|
135
|
+
},
|
|
136
|
+
): object | string
|
|
137
|
+
completion(
|
|
138
|
+
options: LlamaCompletionOptions,
|
|
139
|
+
callback?: (token: LlamaCompletionToken) => void,
|
|
140
|
+
): Promise<LlamaCompletionResult>
|
|
110
141
|
stopCompletion(): void
|
|
111
142
|
tokenize(text: string): Promise<TokenizeResult>
|
|
112
143
|
detokenize(tokens: number[]): Promise<string>
|
|
@@ -119,6 +150,7 @@ export interface LlamaContext {
|
|
|
119
150
|
getLoadedLoraAdapters(): { path: string; scaled: number }[]
|
|
120
151
|
// static
|
|
121
152
|
loadModelInfo(path: string, skip: string[]): Promise<Object>
|
|
153
|
+
toggleNativeLog(enable: boolean, callback: (level: string, text: string) => void): void
|
|
122
154
|
}
|
|
123
155
|
|
|
124
156
|
export interface Module {
|
|
@@ -129,7 +161,10 @@ export type LibVariant = 'default' | 'vulkan' | 'cuda'
|
|
|
129
161
|
|
|
130
162
|
const setupEnv = (variant?: string) => {
|
|
131
163
|
const postfix = variant ? `-${variant}` : ''
|
|
132
|
-
const binPath = path.resolve(
|
|
164
|
+
const binPath = path.resolve(
|
|
165
|
+
__dirname,
|
|
166
|
+
`../bin/${process.platform}${postfix}/${process.arch}/`,
|
|
167
|
+
)
|
|
133
168
|
const systemPathEnv = process.env.PATH ?? process.env.Path ?? ''
|
|
134
169
|
if (!systemPathEnv.includes(binPath)) {
|
|
135
170
|
if (process.platform === 'win32') {
|
|
@@ -144,9 +179,13 @@ export const loadModule = async (variant?: LibVariant): Promise<Module> => {
|
|
|
144
179
|
try {
|
|
145
180
|
if (variant && variant !== 'default') {
|
|
146
181
|
setupEnv(variant)
|
|
147
|
-
return await import(
|
|
182
|
+
return (await import(
|
|
183
|
+
`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`
|
|
184
|
+
)) as Module
|
|
148
185
|
}
|
|
149
186
|
} catch {} // ignore errors and try the common path
|
|
150
187
|
setupEnv()
|
|
151
|
-
return await import(
|
|
152
|
-
}
|
|
188
|
+
return (await import(
|
|
189
|
+
`../bin/${process.platform}/${process.arch}/llama-node.node`
|
|
190
|
+
)) as Module
|
|
191
|
+
}
|
package/lib/index.js
CHANGED
|
@@ -23,7 +23,8 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
23
23
|
});
|
|
24
24
|
};
|
|
25
25
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
-
exports.loadLlamaModelInfo = exports.initLlama = exports.loadModel = void 0;
|
|
26
|
+
exports.toggleNativeLog = exports.loadLlamaModelInfo = exports.initLlama = exports.loadModel = void 0;
|
|
27
|
+
exports.addNativeLogListener = addNativeLogListener;
|
|
27
28
|
const binding_1 = require("./binding");
|
|
28
29
|
__exportStar(require("./binding"), exports);
|
|
29
30
|
const mods = {};
|
|
@@ -49,3 +50,22 @@ const loadLlamaModelInfo = (path) => __awaiter(void 0, void 0, void 0, function*
|
|
|
49
50
|
return mods[variant].LlamaContext.loadModelInfo(path, modelInfoSkip);
|
|
50
51
|
});
|
|
51
52
|
exports.loadLlamaModelInfo = loadLlamaModelInfo;
|
|
53
|
+
const logListeners = [];
|
|
54
|
+
const logCallback = (level, text) => {
|
|
55
|
+
logListeners.forEach((listener) => listener(level, text));
|
|
56
|
+
};
|
|
57
|
+
const toggleNativeLog = (enable, options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
58
|
+
var _a, _b;
|
|
59
|
+
const v = (_a = options === null || options === void 0 ? void 0 : options.variant) !== null && _a !== void 0 ? _a : 'default';
|
|
60
|
+
(_b = mods[v]) !== null && _b !== void 0 ? _b : (mods[v] = yield (0, binding_1.loadModule)(v));
|
|
61
|
+
return mods[v].LlamaContext.toggleNativeLog(enable, logCallback);
|
|
62
|
+
});
|
|
63
|
+
exports.toggleNativeLog = toggleNativeLog;
|
|
64
|
+
function addNativeLogListener(listener) {
|
|
65
|
+
logListeners.push(listener);
|
|
66
|
+
return {
|
|
67
|
+
remove: () => {
|
|
68
|
+
logListeners.splice(logListeners.indexOf(listener), 1);
|
|
69
|
+
},
|
|
70
|
+
};
|
|
71
|
+
}
|
package/lib/index.ts
CHANGED
|
@@ -9,7 +9,9 @@ export interface LlamaModelOptionsExtended extends LlamaModelOptions {
|
|
|
9
9
|
|
|
10
10
|
const mods: { [key: string]: Module } = {}
|
|
11
11
|
|
|
12
|
-
export const loadModel = async (
|
|
12
|
+
export const loadModel = async (
|
|
13
|
+
options: LlamaModelOptionsExtended,
|
|
14
|
+
): Promise<LlamaContext> => {
|
|
13
15
|
const variant = options.lib_variant ?? 'default'
|
|
14
16
|
mods[variant] ??= await loadModule(options.lib_variant)
|
|
15
17
|
return new mods[variant].LlamaContext(options)
|
|
@@ -30,3 +32,31 @@ export const loadLlamaModelInfo = async (path: string): Promise<Object> => {
|
|
|
30
32
|
mods[variant] ??= await loadModule(variant)
|
|
31
33
|
return mods[variant].LlamaContext.loadModelInfo(path, modelInfoSkip)
|
|
32
34
|
}
|
|
35
|
+
|
|
36
|
+
const logListeners: Array<(level: string, text: string) => void> = []
|
|
37
|
+
|
|
38
|
+
const logCallback = (level: string, text: string) => {
|
|
39
|
+
logListeners.forEach((listener) => listener(level, text))
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export const toggleNativeLog = async (
|
|
43
|
+
enable: boolean,
|
|
44
|
+
options?: {
|
|
45
|
+
variant?: LibVariant
|
|
46
|
+
},
|
|
47
|
+
) => {
|
|
48
|
+
const v = options?.variant ?? 'default'
|
|
49
|
+
mods[v] ??= await loadModule(v)
|
|
50
|
+
return mods[v].LlamaContext.toggleNativeLog(enable, logCallback)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export function addNativeLogListener(
|
|
54
|
+
listener: (level: string, text: string) => void,
|
|
55
|
+
): { remove: () => void } {
|
|
56
|
+
logListeners.push(listener)
|
|
57
|
+
return {
|
|
58
|
+
remove: () => {
|
|
59
|
+
logListeners.splice(logListeners.indexOf(listener), 1)
|
|
60
|
+
},
|
|
61
|
+
}
|
|
62
|
+
}
|
package/package.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "0.3.
|
|
5
|
-
"description": "
|
|
4
|
+
"version": "0.3.11",
|
|
5
|
+
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
8
8
|
"test": "jest",
|
|
@@ -22,7 +22,9 @@
|
|
|
22
22
|
"llama",
|
|
23
23
|
"llm",
|
|
24
24
|
"ai",
|
|
25
|
-
"genai"
|
|
25
|
+
"genai",
|
|
26
|
+
"Local LLM",
|
|
27
|
+
"llama.cpp"
|
|
26
28
|
],
|
|
27
29
|
"author": "Hans <hans.chen@bricks.tools>",
|
|
28
30
|
"license": "MIT",
|
|
@@ -77,5 +79,12 @@
|
|
|
77
79
|
"testMatch": [
|
|
78
80
|
"**/*.test.ts"
|
|
79
81
|
]
|
|
82
|
+
},
|
|
83
|
+
"prettier": {
|
|
84
|
+
"trailingComma": "all",
|
|
85
|
+
"tabWidth": 2,
|
|
86
|
+
"semi": false,
|
|
87
|
+
"singleQuote": true,
|
|
88
|
+
"printWidth": 80
|
|
80
89
|
}
|
|
81
90
|
}
|
|
@@ -35,9 +35,10 @@ size_t findStoppingStrings(const std::string &text,
|
|
|
35
35
|
LlamaCompletionWorker::LlamaCompletionWorker(
|
|
36
36
|
const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
37
37
|
Napi::Function callback, common_params params,
|
|
38
|
-
std::vector<std::string> stop_words
|
|
38
|
+
std::vector<std::string> stop_words,
|
|
39
|
+
int32_t chat_format)
|
|
39
40
|
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
|
|
40
|
-
_params(params), _stop_words(stop_words) {
|
|
41
|
+
_params(params), _stop_words(stop_words), _chat_format(chat_format) {
|
|
41
42
|
if (!callback.IsEmpty()) {
|
|
42
43
|
_tsfn = Napi::ThreadSafeFunction::New(info.Env(), callback,
|
|
43
44
|
"LlamaCompletionCallback", 0, 1);
|
|
@@ -152,15 +153,41 @@ void LlamaCompletionWorker::Execute() {
|
|
|
152
153
|
}
|
|
153
154
|
|
|
154
155
|
void LlamaCompletionWorker::OnOK() {
|
|
155
|
-
auto
|
|
156
|
-
result
|
|
156
|
+
auto env = Napi::AsyncWorker::Env();
|
|
157
|
+
auto result = Napi::Object::New(env);
|
|
158
|
+
result.Set("tokens_evaluated", Napi::Number::New(env,
|
|
157
159
|
_result.tokens_evaluated));
|
|
158
160
|
result.Set("tokens_predicted", Napi::Number::New(Napi::AsyncWorker::Env(),
|
|
159
161
|
_result.tokens_predicted));
|
|
160
162
|
result.Set("truncated",
|
|
161
|
-
Napi::Boolean::New(
|
|
163
|
+
Napi::Boolean::New(env, _result.truncated));
|
|
162
164
|
result.Set("text",
|
|
163
|
-
Napi::String::New(
|
|
165
|
+
Napi::String::New(env, _result.text.c_str()));
|
|
166
|
+
|
|
167
|
+
Napi::Array tool_calls = Napi::Array::New(Napi::AsyncWorker::Env());
|
|
168
|
+
if (!_stop) {
|
|
169
|
+
try {
|
|
170
|
+
common_chat_msg message = common_chat_parse(_result.text, static_cast<common_chat_format>(_chat_format));
|
|
171
|
+
for (size_t i = 0; i < message.tool_calls.size(); i++) {
|
|
172
|
+
const auto &tc = message.tool_calls[i];
|
|
173
|
+
Napi::Object tool_call = Napi::Object::New(env);
|
|
174
|
+
tool_call.Set("type", "function");
|
|
175
|
+
Napi::Object function = Napi::Object::New(env);
|
|
176
|
+
function.Set("name", tc.name);
|
|
177
|
+
function.Set("arguments", tc.arguments);
|
|
178
|
+
tool_call.Set("function", function);
|
|
179
|
+
if (!tc.id.empty()) {
|
|
180
|
+
tool_call.Set("id", tc.id);
|
|
181
|
+
}
|
|
182
|
+
tool_calls.Set(i, tool_call);
|
|
183
|
+
}
|
|
184
|
+
} catch (const std::exception &e) {
|
|
185
|
+
// console_log(env, "Error parsing tool calls: " + std::string(e.what()));
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
if (tool_calls.Length() > 0) {
|
|
189
|
+
result.Set("tool_calls", tool_calls);
|
|
190
|
+
}
|
|
164
191
|
|
|
165
192
|
auto ctx = _sess->context();
|
|
166
193
|
const auto timings_token = llama_perf_context(ctx);
|
|
@@ -13,7 +13,8 @@ class LlamaCompletionWorker : public Napi::AsyncWorker,
|
|
|
13
13
|
public:
|
|
14
14
|
LlamaCompletionWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
15
15
|
Napi::Function callback, common_params params,
|
|
16
|
-
std::vector<std::string> stop_words = {}
|
|
16
|
+
std::vector<std::string> stop_words = {},
|
|
17
|
+
int32_t chat_format = 0);
|
|
17
18
|
|
|
18
19
|
~LlamaCompletionWorker();
|
|
19
20
|
|
|
@@ -30,6 +31,7 @@ private:
|
|
|
30
31
|
LlamaSessionPtr _sess;
|
|
31
32
|
common_params _params;
|
|
32
33
|
std::vector<std::string> _stop_words;
|
|
34
|
+
int32_t _chat_format;
|
|
33
35
|
Napi::ThreadSafeFunction _tsfn;
|
|
34
36
|
bool _has_callback = false;
|
|
35
37
|
bool _stop = false;
|