@fugood/llama.node 0.3.8 → 0.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.js +2 -2
- package/lib/binding.ts +52 -8
- package/lib/index.ts +3 -1
- package/package.json +8 -1
- package/src/LlamaCompletionWorker.cpp +33 -6
- package/src/LlamaCompletionWorker.h +3 -1
- package/src/LlamaContext.cpp +387 -28
- package/src/LlamaContext.h +5 -0
- package/src/common.hpp +19 -2
- package/src/llama.cpp/.github/workflows/build.yml +289 -107
- package/src/llama.cpp/.github/workflows/close-issue.yml +1 -1
- package/src/llama.cpp/.github/workflows/docker.yml +2 -1
- package/src/llama.cpp/.github/workflows/server.yml +25 -2
- package/src/llama.cpp/CMakeLists.txt +10 -19
- package/src/llama.cpp/cmake/build-info.cmake +1 -1
- package/src/llama.cpp/common/CMakeLists.txt +32 -0
- package/src/llama.cpp/common/arg.cpp +66 -16
- package/src/llama.cpp/common/chat-template.hpp +515 -0
- package/src/llama.cpp/common/chat.cpp +966 -0
- package/src/llama.cpp/common/chat.hpp +52 -0
- package/src/llama.cpp/common/common.cpp +159 -36
- package/src/llama.cpp/common/common.h +56 -14
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +46 -66
- package/src/llama.cpp/common/json-schema-to-grammar.h +15 -1
- package/src/llama.cpp/common/llguidance.cpp +270 -0
- package/src/llama.cpp/common/log.cpp +1 -10
- package/src/llama.cpp/common/log.h +10 -0
- package/src/llama.cpp/common/minja.hpp +2868 -0
- package/src/llama.cpp/common/sampling.cpp +22 -1
- package/src/llama.cpp/common/sampling.h +3 -0
- package/src/llama.cpp/docs/build.md +54 -9
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +12 -2
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +1 -1
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +59 -0
- package/src/llama.cpp/examples/llava/clip.cpp +133 -14
- package/src/llama.cpp/examples/llava/clip.h +2 -0
- package/src/llama.cpp/examples/llava/llava.cpp +22 -8
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +9 -1
- package/src/llama.cpp/examples/main/main.cpp +26 -25
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +136 -137
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +18 -4
- package/src/llama.cpp/examples/run/run.cpp +224 -69
- package/src/llama.cpp/examples/server/server.cpp +252 -81
- package/src/llama.cpp/examples/server/utils.hpp +73 -21
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +6 -4
- package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +11 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +78 -1
- package/src/llama.cpp/ggml/include/ggml.h +1 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +21 -4
- package/src/llama.cpp/ggml/src/ggml-alloc.c +1 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +91 -78
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +7 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +46 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +16 -1
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +28 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +5 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +33 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +1 -5
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +323 -121
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
- package/src/llama.cpp/ggml/src/ggml.c +23 -13
- package/src/llama.cpp/include/llama.h +14 -1
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +46 -0
- package/src/llama.cpp/src/CMakeLists.txt +1 -1
- package/src/llama.cpp/src/llama-arch.cpp +7 -2
- package/src/llama.cpp/src/llama-arch.h +3 -1
- package/src/llama.cpp/src/llama-chat.cpp +11 -2
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-grammar.cpp +86 -6
- package/src/llama.cpp/src/llama-grammar.h +22 -1
- package/src/llama.cpp/src/llama-mmap.cpp +1 -0
- package/src/llama.cpp/src/llama-model-loader.cpp +1 -1
- package/src/llama.cpp/src/llama-model.cpp +76 -6
- package/src/llama.cpp/src/llama-sampling.cpp +47 -4
- package/src/llama.cpp/src/llama-vocab.cpp +10 -4
- package/src/llama.cpp/src/llama.cpp +181 -123
- package/src/llama.cpp/tests/CMakeLists.txt +4 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +158 -57
- package/src/llama.cpp/tests/test-chat-template.cpp +154 -31
- package/src/llama.cpp/tests/test-chat.cpp +607 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +2 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +1140 -0
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -32
package/src/LlamaContext.cpp
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
#include "ggml.h"
|
|
2
2
|
#include "gguf.h"
|
|
3
3
|
#include "llama-impl.h"
|
|
4
|
+
#include "json.hpp"
|
|
5
|
+
#include "json-schema-to-grammar.h"
|
|
4
6
|
#include "LlamaContext.h"
|
|
5
7
|
#include "DetokenizeWorker.h"
|
|
6
8
|
#include "DisposeWorker.h"
|
|
@@ -10,6 +12,8 @@
|
|
|
10
12
|
#include "SaveSessionWorker.h"
|
|
11
13
|
#include "TokenizeWorker.h"
|
|
12
14
|
|
|
15
|
+
using json = nlohmann::ordered_json;
|
|
16
|
+
|
|
13
17
|
// loadModelInfo(path: string): object
|
|
14
18
|
Napi::Value LlamaContext::ModelInfo(const Napi::CallbackInfo& info) {
|
|
15
19
|
Napi::Env env = info.Env();
|
|
@@ -103,6 +107,15 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
|
|
|
103
107
|
InstanceMethod<&LlamaContext::LoadSession>(
|
|
104
108
|
"loadSession",
|
|
105
109
|
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
110
|
+
InstanceMethod<&LlamaContext::ApplyLoraAdapters>(
|
|
111
|
+
"applyLoraAdapters",
|
|
112
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
113
|
+
InstanceMethod<&LlamaContext::RemoveLoraAdapters>(
|
|
114
|
+
"removeLoraAdapters",
|
|
115
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
116
|
+
InstanceMethod<&LlamaContext::GetLoadedLoraAdapters>(
|
|
117
|
+
"getLoadedLoraAdapters",
|
|
118
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
106
119
|
InstanceMethod<&LlamaContext::Release>(
|
|
107
120
|
"release", static_cast<napi_property_attributes>(napi_enumerable)),
|
|
108
121
|
StaticMethod<&LlamaContext::ModelInfo>(
|
|
@@ -167,6 +180,8 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
167
180
|
params.warmup = false;
|
|
168
181
|
}
|
|
169
182
|
|
|
183
|
+
params.chat_template = get_option<std::string>(options, "chat_template", "");
|
|
184
|
+
|
|
170
185
|
params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
|
|
171
186
|
params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
|
|
172
187
|
params.n_ubatch = get_option<int32_t>(options, "n_ubatch", 512);
|
|
@@ -202,8 +217,52 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
202
217
|
.ThrowAsJavaScriptException();
|
|
203
218
|
}
|
|
204
219
|
|
|
220
|
+
auto ctx = sess->context();
|
|
221
|
+
auto model = sess->model();
|
|
222
|
+
|
|
223
|
+
std::vector<common_adapter_lora_info> lora;
|
|
224
|
+
auto lora_path = get_option<std::string>(options, "lora", "");
|
|
225
|
+
auto lora_scaled = get_option<float>(options, "lora_scaled", 1.0f);
|
|
226
|
+
if (lora_path != "") {
|
|
227
|
+
common_adapter_lora_info la;
|
|
228
|
+
la.path = lora_path;
|
|
229
|
+
la.scale = lora_scaled;
|
|
230
|
+
la.ptr = llama_adapter_lora_init(model, lora_path.c_str());
|
|
231
|
+
if (la.ptr == nullptr) {
|
|
232
|
+
Napi::TypeError::New(env, "Failed to load lora adapter")
|
|
233
|
+
.ThrowAsJavaScriptException();
|
|
234
|
+
}
|
|
235
|
+
lora.push_back(la);
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
if (options.Has("lora_list") && options.Get("lora_list").IsArray()) {
|
|
239
|
+
auto lora_list = options.Get("lora_list").As<Napi::Array>();
|
|
240
|
+
if (lora_list != nullptr) {
|
|
241
|
+
int lora_list_size = lora_list.Length();
|
|
242
|
+
for (int i = 0; i < lora_list_size; i++) {
|
|
243
|
+
auto lora_adapter = lora_list.Get(i).As<Napi::Object>();
|
|
244
|
+
auto path = lora_adapter.Get("path").ToString();
|
|
245
|
+
if (path != nullptr) {
|
|
246
|
+
common_adapter_lora_info la;
|
|
247
|
+
la.path = path;
|
|
248
|
+
la.scale = lora_adapter.Get("scaled").ToNumber().FloatValue();
|
|
249
|
+
la.ptr = llama_adapter_lora_init(model, path.Utf8Value().c_str());
|
|
250
|
+
if (la.ptr == nullptr) {
|
|
251
|
+
Napi::TypeError::New(env, "Failed to load lora adapter")
|
|
252
|
+
.ThrowAsJavaScriptException();
|
|
253
|
+
}
|
|
254
|
+
lora.push_back(la);
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
common_set_adapter_lora(ctx, lora);
|
|
260
|
+
_lora = lora;
|
|
261
|
+
|
|
205
262
|
_sess = sess;
|
|
206
263
|
_info = common_params_get_system_info(params);
|
|
264
|
+
|
|
265
|
+
_templates = common_chat_templates_from_model(model, params.chat_template);
|
|
207
266
|
}
|
|
208
267
|
|
|
209
268
|
// getSystemInfo(): string
|
|
@@ -211,17 +270,12 @@ Napi::Value LlamaContext::GetSystemInfo(const Napi::CallbackInfo &info) {
|
|
|
211
270
|
return Napi::String::New(info.Env(), _info);
|
|
212
271
|
}
|
|
213
272
|
|
|
214
|
-
bool validateModelChatTemplate(const struct llama_model * model) {
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
const char * tmpl = llama_model_chat_template(model);
|
|
221
|
-
int32_t chat_res = llama_chat_apply_template(tmpl, chat, 1, true, nullptr, 0);
|
|
222
|
-
return chat_res > 0;
|
|
223
|
-
}
|
|
224
|
-
return res > 0;
|
|
273
|
+
bool validateModelChatTemplate(const struct llama_model * model, const bool use_jinja, const char * name) {
|
|
274
|
+
const char * tmpl = llama_model_chat_template(model, name);
|
|
275
|
+
if (tmpl == nullptr) {
|
|
276
|
+
return false;
|
|
277
|
+
}
|
|
278
|
+
return common_chat_verify_template(tmpl, use_jinja);
|
|
225
279
|
}
|
|
226
280
|
|
|
227
281
|
// getModelInfo(): object
|
|
@@ -235,29 +289,204 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
|
235
289
|
for (int i = 0; i < count; i++) {
|
|
236
290
|
char key[256];
|
|
237
291
|
llama_model_meta_key_by_index(model, i, key, sizeof(key));
|
|
238
|
-
char val[
|
|
292
|
+
char val[4096];
|
|
239
293
|
llama_model_meta_val_str_by_index(model, i, val, sizeof(val));
|
|
240
294
|
|
|
241
295
|
metadata.Set(key, val);
|
|
242
296
|
}
|
|
243
297
|
Napi::Object details = Napi::Object::New(info.Env());
|
|
244
298
|
details.Set("desc", desc);
|
|
299
|
+
details.Set("nEmbd", llama_model_n_embd(model));
|
|
245
300
|
details.Set("nParams", llama_model_n_params(model));
|
|
246
301
|
details.Set("size", llama_model_size(model));
|
|
247
|
-
|
|
302
|
+
|
|
303
|
+
Napi::Object chatTemplates = Napi::Object::New(info.Env());
|
|
304
|
+
chatTemplates.Set("llamaChat", validateModelChatTemplate(model, false, ""));
|
|
305
|
+
Napi::Object minja = Napi::Object::New(info.Env());
|
|
306
|
+
minja.Set("default", validateModelChatTemplate(model, true, ""));
|
|
307
|
+
Napi::Object defaultCaps = Napi::Object::New(info.Env());
|
|
308
|
+
defaultCaps.Set("tools", _templates.template_default->original_caps().supports_tools);
|
|
309
|
+
defaultCaps.Set("toolCalls", _templates.template_default->original_caps().supports_tool_calls);
|
|
310
|
+
defaultCaps.Set("toolResponses", _templates.template_default->original_caps().supports_tool_responses);
|
|
311
|
+
defaultCaps.Set("systemRole", _templates.template_default->original_caps().supports_system_role);
|
|
312
|
+
defaultCaps.Set("parallelToolCalls", _templates.template_default->original_caps().supports_parallel_tool_calls);
|
|
313
|
+
defaultCaps.Set("toolCallId", _templates.template_default->original_caps().supports_tool_call_id);
|
|
314
|
+
minja.Set("defaultCaps", defaultCaps);
|
|
315
|
+
Napi::Object toolUse = Napi::Object::New(info.Env());
|
|
316
|
+
toolUse.Set("toolUse", validateModelChatTemplate(model, true, "tool_use"));
|
|
317
|
+
if (_templates.template_tool_use) {
|
|
318
|
+
Napi::Object toolUseCaps = Napi::Object::New(info.Env());
|
|
319
|
+
toolUseCaps.Set("tools", _templates.template_tool_use->original_caps().supports_tools);
|
|
320
|
+
toolUseCaps.Set("toolCalls", _templates.template_tool_use->original_caps().supports_tool_calls);
|
|
321
|
+
toolUseCaps.Set("toolResponses", _templates.template_tool_use->original_caps().supports_tool_responses);
|
|
322
|
+
toolUseCaps.Set("systemRole", _templates.template_tool_use->original_caps().supports_system_role);
|
|
323
|
+
toolUseCaps.Set("parallelToolCalls", _templates.template_tool_use->original_caps().supports_parallel_tool_calls);
|
|
324
|
+
toolUseCaps.Set("toolCallId", _templates.template_tool_use->original_caps().supports_tool_call_id);
|
|
325
|
+
toolUse.Set("toolUseCaps", toolUseCaps);
|
|
326
|
+
}
|
|
327
|
+
minja.Set("toolUse", toolUse);
|
|
328
|
+
chatTemplates.Set("minja", minja);
|
|
329
|
+
details.Set("chatTemplates", chatTemplates);
|
|
330
|
+
|
|
248
331
|
details.Set("metadata", metadata);
|
|
249
332
|
return details;
|
|
250
333
|
}
|
|
251
334
|
|
|
252
|
-
|
|
335
|
+
common_chat_params getFormattedChatWithJinja(
|
|
336
|
+
const struct llama_model * model,
|
|
337
|
+
const common_chat_templates &templates,
|
|
338
|
+
const std::string &messages,
|
|
339
|
+
const std::string &chat_template,
|
|
340
|
+
const std::string &json_schema,
|
|
341
|
+
const std::string &tools,
|
|
342
|
+
const bool ¶llel_tool_calls,
|
|
343
|
+
const std::string &tool_choice
|
|
344
|
+
) {
|
|
345
|
+
common_chat_inputs inputs;
|
|
346
|
+
inputs.messages = json::parse(messages);
|
|
347
|
+
auto useTools = !tools.empty();
|
|
348
|
+
if (useTools) {
|
|
349
|
+
inputs.tools = json::parse(tools);
|
|
350
|
+
}
|
|
351
|
+
inputs.parallel_tool_calls = parallel_tool_calls;
|
|
352
|
+
if (!tool_choice.empty()) {
|
|
353
|
+
inputs.tool_choice = tool_choice;
|
|
354
|
+
}
|
|
355
|
+
if (!json_schema.empty()) {
|
|
356
|
+
inputs.json_schema = json::parse(json_schema);
|
|
357
|
+
}
|
|
358
|
+
inputs.stream = true;
|
|
359
|
+
|
|
360
|
+
// If chat_template is provided, create new one and use it (probably slow)
|
|
361
|
+
if (!chat_template.empty()) {
|
|
362
|
+
auto tmp = common_chat_templates_from_model(model, chat_template);
|
|
363
|
+
const common_chat_template* template_ptr = useTools && tmp.template_tool_use ? tmp.template_tool_use.get() : tmp.template_default.get();
|
|
364
|
+
if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
|
|
365
|
+
inputs.parallel_tool_calls = false;
|
|
366
|
+
}
|
|
367
|
+
return common_chat_params_init(*template_ptr, inputs);
|
|
368
|
+
} else {
|
|
369
|
+
const common_chat_template* template_ptr = useTools && templates.template_tool_use ? templates.template_tool_use.get() : templates.template_default.get();
|
|
370
|
+
if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
|
|
371
|
+
inputs.parallel_tool_calls = false;
|
|
372
|
+
}
|
|
373
|
+
return common_chat_params_init(*template_ptr, inputs);
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
std::string getFormattedChat(
|
|
378
|
+
const struct llama_model * model,
|
|
379
|
+
const common_chat_templates &templates,
|
|
380
|
+
const std::string &messages,
|
|
381
|
+
const std::string &chat_template
|
|
382
|
+
) {
|
|
383
|
+
auto chat_json = json::parse(messages);
|
|
384
|
+
|
|
385
|
+
// Handle regular chat without tools
|
|
386
|
+
std::vector<common_chat_msg> chat_msgs;
|
|
387
|
+
for (const auto &msg : chat_json) {
|
|
388
|
+
chat_msgs.push_back({
|
|
389
|
+
msg["role"].get<std::string>(),
|
|
390
|
+
msg["content"].get<std::string>()
|
|
391
|
+
});
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
// If chat_template is provided, create new one and use it (probably slow)
|
|
395
|
+
if (!chat_template.empty()) {
|
|
396
|
+
auto tmp = common_chat_templates_from_model(model, chat_template);
|
|
397
|
+
return common_chat_apply_template(
|
|
398
|
+
*tmp.template_default,
|
|
399
|
+
chat_msgs,
|
|
400
|
+
true,
|
|
401
|
+
false
|
|
402
|
+
);
|
|
403
|
+
} else {
|
|
404
|
+
return common_chat_apply_template(
|
|
405
|
+
*templates.template_default,
|
|
406
|
+
chat_msgs,
|
|
407
|
+
true,
|
|
408
|
+
false
|
|
409
|
+
);
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
// getFormattedChat(
|
|
414
|
+
// messages: [{ role: string, content: string }],
|
|
415
|
+
// chat_template: string,
|
|
416
|
+
// params: { jinja: boolean, json_schema: string, tools: string, parallel_tool_calls: boolean, tool_choice: string }
|
|
417
|
+
// ): object | string
|
|
253
418
|
Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
|
|
254
419
|
Napi::Env env = info.Env();
|
|
255
420
|
if (info.Length() < 1 || !info[0].IsArray()) {
|
|
256
421
|
Napi::TypeError::New(env, "Array expected").ThrowAsJavaScriptException();
|
|
257
422
|
}
|
|
258
|
-
auto messages = info[0].As<Napi::Array>();
|
|
259
|
-
|
|
260
|
-
|
|
423
|
+
auto messages = json_stringify(info[0].As<Napi::Array>());
|
|
424
|
+
printf("messages: %s\n", messages.c_str());
|
|
425
|
+
auto chat_template = info[1].IsString() ? info[1].ToString().Utf8Value() : "";
|
|
426
|
+
|
|
427
|
+
auto has_params = info.Length() >= 2;
|
|
428
|
+
auto params = has_params ? info[2].As<Napi::Object>() : Napi::Object::New(env);
|
|
429
|
+
|
|
430
|
+
if (get_option<bool>(params, "jinja", false)) {
|
|
431
|
+
std::string json_schema_str = "";
|
|
432
|
+
if (!is_nil(params.Get("response_format"))) {
|
|
433
|
+
auto response_format = params.Get("response_format").As<Napi::Object>();
|
|
434
|
+
auto response_format_type = get_option<std::string>(response_format, "type", "text");
|
|
435
|
+
if (response_format_type == "json_schema" && response_format.Has("json_schema")) {
|
|
436
|
+
auto json_schema = response_format.Get("json_schema").As<Napi::Object>();
|
|
437
|
+
json_schema_str = json_schema.Has("schema") ?
|
|
438
|
+
json_stringify(json_schema.Get("schema").As<Napi::Object>()) :
|
|
439
|
+
"{}";
|
|
440
|
+
} else if (response_format_type == "json_object") {
|
|
441
|
+
json_schema_str = response_format.Has("schema") ?
|
|
442
|
+
json_stringify(response_format.Get("schema").As<Napi::Object>()) :
|
|
443
|
+
"{}";
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
auto tools_str = params.Has("tools") ?
|
|
447
|
+
json_stringify(params.Get("tools").As<Napi::Array>()) :
|
|
448
|
+
"";
|
|
449
|
+
auto parallel_tool_calls = get_option<bool>(params, "parallel_tool_calls", false);
|
|
450
|
+
auto tool_choice = get_option<std::string>(params, "tool_choice", "");
|
|
451
|
+
|
|
452
|
+
auto chatParams = getFormattedChatWithJinja(_sess->model(), _templates, messages, chat_template, json_schema_str, tools_str, parallel_tool_calls, tool_choice);
|
|
453
|
+
|
|
454
|
+
Napi::Object result = Napi::Object::New(env);
|
|
455
|
+
result.Set("prompt", chatParams.prompt.get<std::string>());
|
|
456
|
+
// chat_format: int
|
|
457
|
+
result.Set("chat_format", static_cast<int>(chatParams.format));
|
|
458
|
+
// grammar: string
|
|
459
|
+
result.Set("grammar", chatParams.grammar);
|
|
460
|
+
// grammar_lazy: boolean
|
|
461
|
+
result.Set("grammea_lazy", chatParams.grammar_lazy);
|
|
462
|
+
// grammar_triggers: [{ word: string, at_start: boolean }]
|
|
463
|
+
Napi::Array grammar_triggers = Napi::Array::New(env);
|
|
464
|
+
for (size_t i = 0; i < chatParams.grammar_triggers.size(); i++) {
|
|
465
|
+
const auto & trigger = chatParams.grammar_triggers[i];
|
|
466
|
+
Napi::Object triggerObj = Napi::Object::New(env);
|
|
467
|
+
triggerObj.Set("word", Napi::String::New(env, trigger.word.c_str()));
|
|
468
|
+
triggerObj.Set("at_start", Napi::Boolean::New(env, trigger.at_start));
|
|
469
|
+
grammar_triggers.Set(i, triggerObj);
|
|
470
|
+
}
|
|
471
|
+
result.Set("grammar_triggers", grammar_triggers);
|
|
472
|
+
// preserved_tokens: string[]
|
|
473
|
+
Napi::Array preserved_tokens = Napi::Array::New(env);
|
|
474
|
+
for (size_t i = 0; i < chatParams.preserved_tokens.size(); i++) {
|
|
475
|
+
preserved_tokens.Set(i, Napi::String::New(env, chatParams.preserved_tokens[i].c_str()));
|
|
476
|
+
}
|
|
477
|
+
result.Set("preserved_tokens", preserved_tokens);
|
|
478
|
+
// additional_stops: string[]
|
|
479
|
+
Napi::Array additional_stops = Napi::Array::New(env);
|
|
480
|
+
for (size_t i = 0; i < chatParams.additional_stops.size(); i++) {
|
|
481
|
+
additional_stops.Set(i, Napi::String::New(env, chatParams.additional_stops[i].c_str()));
|
|
482
|
+
}
|
|
483
|
+
result.Set("additional_stops", additional_stops);
|
|
484
|
+
|
|
485
|
+
return result;
|
|
486
|
+
} else {
|
|
487
|
+
auto formatted = getFormattedChat(_sess->model(), _templates, messages, chat_template);
|
|
488
|
+
return Napi::String::New(env, formatted);
|
|
489
|
+
}
|
|
261
490
|
}
|
|
262
491
|
|
|
263
492
|
// completion(options: LlamaCompletionOptions, onToken?: (token: string) =>
|
|
@@ -280,11 +509,101 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
280
509
|
}
|
|
281
510
|
auto options = info[0].As<Napi::Object>();
|
|
282
511
|
|
|
512
|
+
std::vector<std::string> stop_words;
|
|
513
|
+
if (options.Has("stop") && options.Get("stop").IsArray()) {
|
|
514
|
+
auto stop_words_array = options.Get("stop").As<Napi::Array>();
|
|
515
|
+
for (size_t i = 0; i < stop_words_array.Length(); i++) {
|
|
516
|
+
stop_words.push_back(stop_words_array.Get(i).ToString().Utf8Value());
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
int32_t chat_format = get_option<int32_t>(options, "chat_format", 0);
|
|
521
|
+
|
|
283
522
|
common_params params = _sess->params();
|
|
523
|
+
auto grammar_from_params = get_option<std::string>(options, "grammar", "");
|
|
524
|
+
auto has_grammar_set = !grammar_from_params.empty();
|
|
525
|
+
if (has_grammar_set) {
|
|
526
|
+
params.sampling.grammar = grammar_from_params;
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
std::string json_schema_str = "";
|
|
530
|
+
if (options.Has("response_format")) {
|
|
531
|
+
auto response_format = options.Get("response_format").As<Napi::Object>();
|
|
532
|
+
auto response_format_type = get_option<std::string>(response_format, "type", "text");
|
|
533
|
+
if (response_format_type == "json_schema" && response_format.Has("json_schema")) {
|
|
534
|
+
auto json_schema = response_format.Get("json_schema").As<Napi::Object>();
|
|
535
|
+
json_schema_str = json_schema.Has("schema") ?
|
|
536
|
+
json_stringify(json_schema.Get("schema").As<Napi::Object>()) :
|
|
537
|
+
"{}";
|
|
538
|
+
} else if (response_format_type == "json_object") {
|
|
539
|
+
json_schema_str = response_format.Has("schema") ?
|
|
540
|
+
json_stringify(response_format.Get("schema").As<Napi::Object>()) :
|
|
541
|
+
"{}";
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
|
|
284
545
|
if (options.Has("messages") && options.Get("messages").IsArray()) {
|
|
285
546
|
auto messages = options.Get("messages").As<Napi::Array>();
|
|
286
|
-
auto
|
|
287
|
-
|
|
547
|
+
auto chat_template = get_option<std::string>(options, "chat_template", "");
|
|
548
|
+
auto jinja = get_option<bool>(options, "jinja", false);
|
|
549
|
+
if (jinja) {
|
|
550
|
+
auto tools_str = options.Has("tools") ?
|
|
551
|
+
json_stringify(options.Get("tools").As<Napi::Array>()) :
|
|
552
|
+
"";
|
|
553
|
+
auto parallel_tool_calls = get_option<bool>(options, "parallel_tool_calls", false);
|
|
554
|
+
auto tool_choice = get_option<std::string>(options, "tool_choice", "none");
|
|
555
|
+
|
|
556
|
+
auto chatParams = getFormattedChatWithJinja(
|
|
557
|
+
_sess->model(),
|
|
558
|
+
_templates,
|
|
559
|
+
json_stringify(messages),
|
|
560
|
+
chat_template,
|
|
561
|
+
json_schema_str,
|
|
562
|
+
tools_str,
|
|
563
|
+
parallel_tool_calls,
|
|
564
|
+
tool_choice
|
|
565
|
+
);
|
|
566
|
+
|
|
567
|
+
params.prompt = chatParams.prompt.get<std::string>();
|
|
568
|
+
|
|
569
|
+
chat_format = chatParams.format;
|
|
570
|
+
|
|
571
|
+
if (!has_grammar_set) {
|
|
572
|
+
// grammar param always wins jinja template & json_schema
|
|
573
|
+
params.sampling.grammar = chatParams.grammar;
|
|
574
|
+
params.sampling.grammar_lazy = chatParams.grammar_lazy;
|
|
575
|
+
|
|
576
|
+
for (const auto & trigger : chatParams.grammar_triggers) {
|
|
577
|
+
auto ids = common_tokenize(_sess->context(), trigger.word, /* add_special= */ false, /* parse_special= */ true);
|
|
578
|
+
if (ids.size() == 1) {
|
|
579
|
+
params.sampling.grammar_trigger_tokens.push_back(ids[0]);
|
|
580
|
+
params.sampling.preserved_tokens.insert(ids[0]);
|
|
581
|
+
continue;
|
|
582
|
+
}
|
|
583
|
+
params.sampling.grammar_trigger_words.push_back(trigger);
|
|
584
|
+
}
|
|
585
|
+
has_grammar_set = true;
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
for (const auto & token : chatParams.preserved_tokens) {
|
|
589
|
+
auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
|
|
590
|
+
if (ids.size() == 1) {
|
|
591
|
+
params.sampling.preserved_tokens.insert(ids[0]);
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
for (const auto & stop : chatParams.additional_stops) {
|
|
596
|
+
stop_words.push_back(stop);
|
|
597
|
+
}
|
|
598
|
+
} else {
|
|
599
|
+
auto formatted = getFormattedChat(
|
|
600
|
+
_sess->model(),
|
|
601
|
+
_templates,
|
|
602
|
+
json_stringify(messages),
|
|
603
|
+
chat_template
|
|
604
|
+
);
|
|
605
|
+
params.prompt = formatted;
|
|
606
|
+
}
|
|
288
607
|
} else {
|
|
289
608
|
params.prompt = get_option<std::string>(options, "prompt", "");
|
|
290
609
|
}
|
|
@@ -292,6 +611,11 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
292
611
|
Napi::TypeError::New(env, "Prompt is required")
|
|
293
612
|
.ThrowAsJavaScriptException();
|
|
294
613
|
}
|
|
614
|
+
|
|
615
|
+
if (!has_grammar_set && !json_schema_str.empty()) {
|
|
616
|
+
params.sampling.grammar = json_schema_to_grammar(json::parse(json_schema_str));
|
|
617
|
+
}
|
|
618
|
+
|
|
295
619
|
params.n_predict = get_option<int32_t>(options, "n_predict", -1);
|
|
296
620
|
params.sampling.temp = get_option<float>(options, "temperature", 0.80f);
|
|
297
621
|
params.sampling.top_k = get_option<int32_t>(options, "top_k", 40);
|
|
@@ -318,16 +642,8 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
318
642
|
params.sampling.dry_allowed_length = get_option<float>(options, "dry_allowed_length", -1);
|
|
319
643
|
params.sampling.dry_penalty_last_n = get_option<float>(options, "dry_penalty_last_n", 0);
|
|
320
644
|
params.sampling.ignore_eos = get_option<bool>(options, "ignore_eos", false);
|
|
321
|
-
params.sampling.grammar = get_option<std::string>(options, "grammar", "");
|
|
322
645
|
params.n_keep = get_option<int32_t>(options, "n_keep", 0);
|
|
323
646
|
params.sampling.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
|
|
324
|
-
std::vector<std::string> stop_words;
|
|
325
|
-
if (options.Has("stop") && options.Get("stop").IsArray()) {
|
|
326
|
-
auto stop_words_array = options.Get("stop").As<Napi::Array>();
|
|
327
|
-
for (size_t i = 0; i < stop_words_array.Length(); i++) {
|
|
328
|
-
stop_words.push_back(stop_words_array.Get(i).ToString().Utf8Value());
|
|
329
|
-
}
|
|
330
|
-
}
|
|
331
647
|
|
|
332
648
|
Napi::Function callback;
|
|
333
649
|
if (info.Length() >= 2) {
|
|
@@ -335,7 +651,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
335
651
|
}
|
|
336
652
|
|
|
337
653
|
auto *worker =
|
|
338
|
-
new LlamaCompletionWorker(info, _sess, callback, params, stop_words);
|
|
654
|
+
new LlamaCompletionWorker(info, _sess, callback, params, stop_words, chat_format);
|
|
339
655
|
worker->Queue();
|
|
340
656
|
_wip = worker;
|
|
341
657
|
worker->onComplete([this]() { _wip = nullptr; });
|
|
@@ -451,6 +767,49 @@ Napi::Value LlamaContext::LoadSession(const Napi::CallbackInfo &info) {
|
|
|
451
767
|
return worker->Promise();
|
|
452
768
|
}
|
|
453
769
|
|
|
770
|
+
// applyLoraAdapters(lora_adapters: [{ path: string, scaled: number }]): void
|
|
771
|
+
void LlamaContext::ApplyLoraAdapters(const Napi::CallbackInfo &info) {
|
|
772
|
+
Napi::Env env = info.Env();
|
|
773
|
+
std::vector<common_adapter_lora_info> lora;
|
|
774
|
+
auto lora_adapters = info[0].As<Napi::Array>();
|
|
775
|
+
for (size_t i = 0; i < lora_adapters.Length(); i++) {
|
|
776
|
+
auto lora_adapter = lora_adapters.Get(i).As<Napi::Object>();
|
|
777
|
+
auto path = lora_adapter.Get("path").ToString().Utf8Value();
|
|
778
|
+
auto scaled = lora_adapter.Get("scaled").ToNumber().FloatValue();
|
|
779
|
+
common_adapter_lora_info la;
|
|
780
|
+
la.path = path;
|
|
781
|
+
la.scale = scaled;
|
|
782
|
+
la.ptr = llama_adapter_lora_init(_sess->model(), path.c_str());
|
|
783
|
+
if (la.ptr == nullptr) {
|
|
784
|
+
Napi::TypeError::New(env, "Failed to load lora adapter")
|
|
785
|
+
.ThrowAsJavaScriptException();
|
|
786
|
+
}
|
|
787
|
+
lora.push_back(la);
|
|
788
|
+
}
|
|
789
|
+
common_set_adapter_lora(_sess->context(), lora);
|
|
790
|
+
_lora = lora;
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
// removeLoraAdapters(): void
|
|
794
|
+
void LlamaContext::RemoveLoraAdapters(const Napi::CallbackInfo &info) {
|
|
795
|
+
_lora.clear();
|
|
796
|
+
common_set_adapter_lora(_sess->context(), _lora);
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
// getLoadedLoraAdapters(): Promise<{ count, lora_adapters: [{ path: string,
|
|
800
|
+
// scaled: number }] }>
|
|
801
|
+
Napi::Value LlamaContext::GetLoadedLoraAdapters(const Napi::CallbackInfo &info) {
|
|
802
|
+
Napi::Env env = info.Env();
|
|
803
|
+
Napi::Array lora_adapters = Napi::Array::New(env, _lora.size());
|
|
804
|
+
for (size_t i = 0; i < _lora.size(); i++) {
|
|
805
|
+
Napi::Object lora_adapter = Napi::Object::New(env);
|
|
806
|
+
lora_adapter.Set("path", _lora[i].path);
|
|
807
|
+
lora_adapter.Set("scaled", _lora[i].scale);
|
|
808
|
+
lora_adapters.Set(i, lora_adapter);
|
|
809
|
+
}
|
|
810
|
+
return lora_adapters;
|
|
811
|
+
}
|
|
812
|
+
|
|
454
813
|
// release(): Promise<void>
|
|
455
814
|
Napi::Value LlamaContext::Release(const Napi::CallbackInfo &info) {
|
|
456
815
|
auto env = info.Env();
|
package/src/LlamaContext.h
CHANGED
|
@@ -19,10 +19,15 @@ private:
|
|
|
19
19
|
Napi::Value Embedding(const Napi::CallbackInfo &info);
|
|
20
20
|
Napi::Value SaveSession(const Napi::CallbackInfo &info);
|
|
21
21
|
Napi::Value LoadSession(const Napi::CallbackInfo &info);
|
|
22
|
+
void ApplyLoraAdapters(const Napi::CallbackInfo &info);
|
|
23
|
+
void RemoveLoraAdapters(const Napi::CallbackInfo &info);
|
|
24
|
+
Napi::Value GetLoadedLoraAdapters(const Napi::CallbackInfo &info);
|
|
22
25
|
Napi::Value Release(const Napi::CallbackInfo &info);
|
|
23
26
|
|
|
24
27
|
std::string _info;
|
|
25
28
|
Napi::Object _meta;
|
|
26
29
|
LlamaSessionPtr _sess = nullptr;
|
|
30
|
+
common_chat_templates _templates;
|
|
31
|
+
std::vector<common_adapter_lora_info> _lora;
|
|
27
32
|
LlamaCompletionWorker *_wip = nullptr;
|
|
28
33
|
};
|
package/src/common.hpp
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
#include "common/common.h"
|
|
4
4
|
#include "common/sampling.h"
|
|
5
|
+
#include "chat.hpp"
|
|
6
|
+
#include "chat-template.hpp"
|
|
5
7
|
#include "llama.h"
|
|
6
8
|
#include <memory>
|
|
7
9
|
#include <mutex>
|
|
@@ -15,11 +17,26 @@ typedef std::unique_ptr<common_sampler, decltype(&common_sampler_free)>
|
|
|
15
17
|
LlamaCppSampling;
|
|
16
18
|
typedef std::unique_ptr<llama_batch, decltype(&llama_batch_free)> LlamaCppBatch;
|
|
17
19
|
|
|
20
|
+
static bool is_nil(const Napi::Value &value) {
|
|
21
|
+
return value.IsNull() || value.IsUndefined();
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
static std::string json_stringify(const Napi::Object &obj) {
|
|
25
|
+
Napi::Env env = obj.Env();
|
|
26
|
+
Napi::Object json = env.Global().Get("JSON").As<Napi::Object>();
|
|
27
|
+
Napi::Function stringify = json.Get("stringify").As<Napi::Function>();
|
|
28
|
+
return stringify.Call(json, { obj }).As<Napi::String>().ToString();
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
static void console_log(Napi::Env env, const std::string& message) {
|
|
32
|
+
Napi::Function consoleLog = env.Global().Get("console").As<Napi::Object>().Get("log").As<Napi::Function>();
|
|
33
|
+
consoleLog.Call({ Napi::String::New(env, message) });
|
|
34
|
+
}
|
|
35
|
+
|
|
18
36
|
template <typename T>
|
|
19
37
|
constexpr T get_option(const Napi::Object &options, const std::string &name,
|
|
20
38
|
const T default_value) {
|
|
21
|
-
if (options.Has(name) && !options.Get(name)
|
|
22
|
-
!options.Get(name).IsNull()) {
|
|
39
|
+
if (options.Has(name) && !is_nil(options.Get(name))) {
|
|
23
40
|
if constexpr (std::is_same<T, std::string>::value) {
|
|
24
41
|
return options.Get(name).ToString().operator T();
|
|
25
42
|
} else if constexpr (std::is_same<T, int32_t>::value ||
|