@fugood/llama.node 0.3.9 → 0.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.js +2 -2
- package/lib/binding.ts +46 -8
- package/lib/index.ts +3 -1
- package/package.json +8 -1
- package/src/LlamaCompletionWorker.cpp +33 -6
- package/src/LlamaCompletionWorker.h +3 -1
- package/src/LlamaContext.cpp +292 -28
- package/src/LlamaContext.h +1 -0
- package/src/common.hpp +19 -2
- package/src/llama.cpp/.github/workflows/build.yml +289 -107
- package/src/llama.cpp/.github/workflows/close-issue.yml +1 -1
- package/src/llama.cpp/.github/workflows/docker.yml +2 -1
- package/src/llama.cpp/.github/workflows/server.yml +25 -2
- package/src/llama.cpp/CMakeLists.txt +10 -19
- package/src/llama.cpp/cmake/build-info.cmake +1 -1
- package/src/llama.cpp/common/CMakeLists.txt +32 -0
- package/src/llama.cpp/common/arg.cpp +66 -16
- package/src/llama.cpp/common/chat-template.hpp +515 -0
- package/src/llama.cpp/common/chat.cpp +966 -0
- package/src/llama.cpp/common/chat.hpp +52 -0
- package/src/llama.cpp/common/common.cpp +159 -36
- package/src/llama.cpp/common/common.h +56 -14
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +46 -66
- package/src/llama.cpp/common/json-schema-to-grammar.h +15 -1
- package/src/llama.cpp/common/llguidance.cpp +270 -0
- package/src/llama.cpp/common/log.cpp +1 -10
- package/src/llama.cpp/common/log.h +10 -0
- package/src/llama.cpp/common/minja.hpp +2868 -0
- package/src/llama.cpp/common/sampling.cpp +22 -1
- package/src/llama.cpp/common/sampling.h +3 -0
- package/src/llama.cpp/docs/build.md +54 -9
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +12 -2
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +1 -1
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +59 -0
- package/src/llama.cpp/examples/llava/clip.cpp +133 -14
- package/src/llama.cpp/examples/llava/clip.h +2 -0
- package/src/llama.cpp/examples/llava/llava.cpp +22 -8
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +9 -1
- package/src/llama.cpp/examples/main/main.cpp +26 -25
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +136 -137
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +18 -4
- package/src/llama.cpp/examples/run/run.cpp +224 -69
- package/src/llama.cpp/examples/server/server.cpp +252 -81
- package/src/llama.cpp/examples/server/utils.hpp +73 -21
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +6 -4
- package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +11 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +78 -1
- package/src/llama.cpp/ggml/include/ggml.h +1 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +21 -4
- package/src/llama.cpp/ggml/src/ggml-alloc.c +1 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +91 -78
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +7 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +46 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +16 -1
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +28 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +5 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +33 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +1 -5
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +323 -121
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
- package/src/llama.cpp/ggml/src/ggml.c +23 -13
- package/src/llama.cpp/include/llama.h +14 -1
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +46 -0
- package/src/llama.cpp/src/CMakeLists.txt +1 -1
- package/src/llama.cpp/src/llama-arch.cpp +7 -2
- package/src/llama.cpp/src/llama-arch.h +3 -1
- package/src/llama.cpp/src/llama-chat.cpp +11 -2
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-grammar.cpp +86 -6
- package/src/llama.cpp/src/llama-grammar.h +22 -1
- package/src/llama.cpp/src/llama-mmap.cpp +1 -0
- package/src/llama.cpp/src/llama-model-loader.cpp +1 -1
- package/src/llama.cpp/src/llama-model.cpp +76 -6
- package/src/llama.cpp/src/llama-sampling.cpp +47 -4
- package/src/llama.cpp/src/llama-vocab.cpp +10 -4
- package/src/llama.cpp/src/llama.cpp +181 -123
- package/src/llama.cpp/tests/CMakeLists.txt +4 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +158 -57
- package/src/llama.cpp/tests/test-chat-template.cpp +154 -31
- package/src/llama.cpp/tests/test-chat.cpp +607 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +2 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +1140 -0
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -32
package/src/LlamaContext.cpp
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
#include "ggml.h"
|
|
2
2
|
#include "gguf.h"
|
|
3
3
|
#include "llama-impl.h"
|
|
4
|
+
#include "json.hpp"
|
|
5
|
+
#include "json-schema-to-grammar.h"
|
|
4
6
|
#include "LlamaContext.h"
|
|
5
7
|
#include "DetokenizeWorker.h"
|
|
6
8
|
#include "DisposeWorker.h"
|
|
@@ -10,6 +12,8 @@
|
|
|
10
12
|
#include "SaveSessionWorker.h"
|
|
11
13
|
#include "TokenizeWorker.h"
|
|
12
14
|
|
|
15
|
+
using json = nlohmann::ordered_json;
|
|
16
|
+
|
|
13
17
|
// loadModelInfo(path: string): object
|
|
14
18
|
Napi::Value LlamaContext::ModelInfo(const Napi::CallbackInfo& info) {
|
|
15
19
|
Napi::Env env = info.Env();
|
|
@@ -176,6 +180,8 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
176
180
|
params.warmup = false;
|
|
177
181
|
}
|
|
178
182
|
|
|
183
|
+
params.chat_template = get_option<std::string>(options, "chat_template", "");
|
|
184
|
+
|
|
179
185
|
params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
|
|
180
186
|
params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
|
|
181
187
|
params.n_ubatch = get_option<int32_t>(options, "n_ubatch", 512);
|
|
@@ -255,6 +261,8 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
255
261
|
|
|
256
262
|
_sess = sess;
|
|
257
263
|
_info = common_params_get_system_info(params);
|
|
264
|
+
|
|
265
|
+
_templates = common_chat_templates_from_model(model, params.chat_template);
|
|
258
266
|
}
|
|
259
267
|
|
|
260
268
|
// getSystemInfo(): string
|
|
@@ -262,17 +270,12 @@ Napi::Value LlamaContext::GetSystemInfo(const Napi::CallbackInfo &info) {
|
|
|
262
270
|
return Napi::String::New(info.Env(), _info);
|
|
263
271
|
}
|
|
264
272
|
|
|
265
|
-
bool validateModelChatTemplate(const struct llama_model * model) {
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
const char * tmpl = llama_model_chat_template(model);
|
|
272
|
-
int32_t chat_res = llama_chat_apply_template(tmpl, chat, 1, true, nullptr, 0);
|
|
273
|
-
return chat_res > 0;
|
|
274
|
-
}
|
|
275
|
-
return res > 0;
|
|
273
|
+
bool validateModelChatTemplate(const struct llama_model * model, const bool use_jinja, const char * name) {
|
|
274
|
+
const char * tmpl = llama_model_chat_template(model, name);
|
|
275
|
+
if (tmpl == nullptr) {
|
|
276
|
+
return false;
|
|
277
|
+
}
|
|
278
|
+
return common_chat_verify_template(tmpl, use_jinja);
|
|
276
279
|
}
|
|
277
280
|
|
|
278
281
|
// getModelInfo(): object
|
|
@@ -286,7 +289,7 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
|
286
289
|
for (int i = 0; i < count; i++) {
|
|
287
290
|
char key[256];
|
|
288
291
|
llama_model_meta_key_by_index(model, i, key, sizeof(key));
|
|
289
|
-
char val[
|
|
292
|
+
char val[4096];
|
|
290
293
|
llama_model_meta_val_str_by_index(model, i, val, sizeof(val));
|
|
291
294
|
|
|
292
295
|
metadata.Set(key, val);
|
|
@@ -296,20 +299,194 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
|
296
299
|
details.Set("nEmbd", llama_model_n_embd(model));
|
|
297
300
|
details.Set("nParams", llama_model_n_params(model));
|
|
298
301
|
details.Set("size", llama_model_size(model));
|
|
299
|
-
|
|
302
|
+
|
|
303
|
+
Napi::Object chatTemplates = Napi::Object::New(info.Env());
|
|
304
|
+
chatTemplates.Set("llamaChat", validateModelChatTemplate(model, false, ""));
|
|
305
|
+
Napi::Object minja = Napi::Object::New(info.Env());
|
|
306
|
+
minja.Set("default", validateModelChatTemplate(model, true, ""));
|
|
307
|
+
Napi::Object defaultCaps = Napi::Object::New(info.Env());
|
|
308
|
+
defaultCaps.Set("tools", _templates.template_default->original_caps().supports_tools);
|
|
309
|
+
defaultCaps.Set("toolCalls", _templates.template_default->original_caps().supports_tool_calls);
|
|
310
|
+
defaultCaps.Set("toolResponses", _templates.template_default->original_caps().supports_tool_responses);
|
|
311
|
+
defaultCaps.Set("systemRole", _templates.template_default->original_caps().supports_system_role);
|
|
312
|
+
defaultCaps.Set("parallelToolCalls", _templates.template_default->original_caps().supports_parallel_tool_calls);
|
|
313
|
+
defaultCaps.Set("toolCallId", _templates.template_default->original_caps().supports_tool_call_id);
|
|
314
|
+
minja.Set("defaultCaps", defaultCaps);
|
|
315
|
+
Napi::Object toolUse = Napi::Object::New(info.Env());
|
|
316
|
+
toolUse.Set("toolUse", validateModelChatTemplate(model, true, "tool_use"));
|
|
317
|
+
if (_templates.template_tool_use) {
|
|
318
|
+
Napi::Object toolUseCaps = Napi::Object::New(info.Env());
|
|
319
|
+
toolUseCaps.Set("tools", _templates.template_tool_use->original_caps().supports_tools);
|
|
320
|
+
toolUseCaps.Set("toolCalls", _templates.template_tool_use->original_caps().supports_tool_calls);
|
|
321
|
+
toolUseCaps.Set("toolResponses", _templates.template_tool_use->original_caps().supports_tool_responses);
|
|
322
|
+
toolUseCaps.Set("systemRole", _templates.template_tool_use->original_caps().supports_system_role);
|
|
323
|
+
toolUseCaps.Set("parallelToolCalls", _templates.template_tool_use->original_caps().supports_parallel_tool_calls);
|
|
324
|
+
toolUseCaps.Set("toolCallId", _templates.template_tool_use->original_caps().supports_tool_call_id);
|
|
325
|
+
toolUse.Set("toolUseCaps", toolUseCaps);
|
|
326
|
+
}
|
|
327
|
+
minja.Set("toolUse", toolUse);
|
|
328
|
+
chatTemplates.Set("minja", minja);
|
|
329
|
+
details.Set("chatTemplates", chatTemplates);
|
|
330
|
+
|
|
300
331
|
details.Set("metadata", metadata);
|
|
301
332
|
return details;
|
|
302
333
|
}
|
|
303
334
|
|
|
304
|
-
|
|
335
|
+
common_chat_params getFormattedChatWithJinja(
|
|
336
|
+
const struct llama_model * model,
|
|
337
|
+
const common_chat_templates &templates,
|
|
338
|
+
const std::string &messages,
|
|
339
|
+
const std::string &chat_template,
|
|
340
|
+
const std::string &json_schema,
|
|
341
|
+
const std::string &tools,
|
|
342
|
+
const bool ¶llel_tool_calls,
|
|
343
|
+
const std::string &tool_choice
|
|
344
|
+
) {
|
|
345
|
+
common_chat_inputs inputs;
|
|
346
|
+
inputs.messages = json::parse(messages);
|
|
347
|
+
auto useTools = !tools.empty();
|
|
348
|
+
if (useTools) {
|
|
349
|
+
inputs.tools = json::parse(tools);
|
|
350
|
+
}
|
|
351
|
+
inputs.parallel_tool_calls = parallel_tool_calls;
|
|
352
|
+
if (!tool_choice.empty()) {
|
|
353
|
+
inputs.tool_choice = tool_choice;
|
|
354
|
+
}
|
|
355
|
+
if (!json_schema.empty()) {
|
|
356
|
+
inputs.json_schema = json::parse(json_schema);
|
|
357
|
+
}
|
|
358
|
+
inputs.stream = true;
|
|
359
|
+
|
|
360
|
+
// If chat_template is provided, create new one and use it (probably slow)
|
|
361
|
+
if (!chat_template.empty()) {
|
|
362
|
+
auto tmp = common_chat_templates_from_model(model, chat_template);
|
|
363
|
+
const common_chat_template* template_ptr = useTools && tmp.template_tool_use ? tmp.template_tool_use.get() : tmp.template_default.get();
|
|
364
|
+
if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
|
|
365
|
+
inputs.parallel_tool_calls = false;
|
|
366
|
+
}
|
|
367
|
+
return common_chat_params_init(*template_ptr, inputs);
|
|
368
|
+
} else {
|
|
369
|
+
const common_chat_template* template_ptr = useTools && templates.template_tool_use ? templates.template_tool_use.get() : templates.template_default.get();
|
|
370
|
+
if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
|
|
371
|
+
inputs.parallel_tool_calls = false;
|
|
372
|
+
}
|
|
373
|
+
return common_chat_params_init(*template_ptr, inputs);
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
std::string getFormattedChat(
|
|
378
|
+
const struct llama_model * model,
|
|
379
|
+
const common_chat_templates &templates,
|
|
380
|
+
const std::string &messages,
|
|
381
|
+
const std::string &chat_template
|
|
382
|
+
) {
|
|
383
|
+
auto chat_json = json::parse(messages);
|
|
384
|
+
|
|
385
|
+
// Handle regular chat without tools
|
|
386
|
+
std::vector<common_chat_msg> chat_msgs;
|
|
387
|
+
for (const auto &msg : chat_json) {
|
|
388
|
+
chat_msgs.push_back({
|
|
389
|
+
msg["role"].get<std::string>(),
|
|
390
|
+
msg["content"].get<std::string>()
|
|
391
|
+
});
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
// If chat_template is provided, create new one and use it (probably slow)
|
|
395
|
+
if (!chat_template.empty()) {
|
|
396
|
+
auto tmp = common_chat_templates_from_model(model, chat_template);
|
|
397
|
+
return common_chat_apply_template(
|
|
398
|
+
*tmp.template_default,
|
|
399
|
+
chat_msgs,
|
|
400
|
+
true,
|
|
401
|
+
false
|
|
402
|
+
);
|
|
403
|
+
} else {
|
|
404
|
+
return common_chat_apply_template(
|
|
405
|
+
*templates.template_default,
|
|
406
|
+
chat_msgs,
|
|
407
|
+
true,
|
|
408
|
+
false
|
|
409
|
+
);
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
// getFormattedChat(
|
|
414
|
+
// messages: [{ role: string, content: string }],
|
|
415
|
+
// chat_template: string,
|
|
416
|
+
// params: { jinja: boolean, json_schema: string, tools: string, parallel_tool_calls: boolean, tool_choice: string }
|
|
417
|
+
// ): object | string
|
|
305
418
|
Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
|
|
306
419
|
Napi::Env env = info.Env();
|
|
307
420
|
if (info.Length() < 1 || !info[0].IsArray()) {
|
|
308
421
|
Napi::TypeError::New(env, "Array expected").ThrowAsJavaScriptException();
|
|
309
422
|
}
|
|
310
|
-
auto messages = info[0].As<Napi::Array>();
|
|
311
|
-
|
|
312
|
-
|
|
423
|
+
auto messages = json_stringify(info[0].As<Napi::Array>());
|
|
424
|
+
printf("messages: %s\n", messages.c_str());
|
|
425
|
+
auto chat_template = info[1].IsString() ? info[1].ToString().Utf8Value() : "";
|
|
426
|
+
|
|
427
|
+
auto has_params = info.Length() >= 2;
|
|
428
|
+
auto params = has_params ? info[2].As<Napi::Object>() : Napi::Object::New(env);
|
|
429
|
+
|
|
430
|
+
if (get_option<bool>(params, "jinja", false)) {
|
|
431
|
+
std::string json_schema_str = "";
|
|
432
|
+
if (!is_nil(params.Get("response_format"))) {
|
|
433
|
+
auto response_format = params.Get("response_format").As<Napi::Object>();
|
|
434
|
+
auto response_format_type = get_option<std::string>(response_format, "type", "text");
|
|
435
|
+
if (response_format_type == "json_schema" && response_format.Has("json_schema")) {
|
|
436
|
+
auto json_schema = response_format.Get("json_schema").As<Napi::Object>();
|
|
437
|
+
json_schema_str = json_schema.Has("schema") ?
|
|
438
|
+
json_stringify(json_schema.Get("schema").As<Napi::Object>()) :
|
|
439
|
+
"{}";
|
|
440
|
+
} else if (response_format_type == "json_object") {
|
|
441
|
+
json_schema_str = response_format.Has("schema") ?
|
|
442
|
+
json_stringify(response_format.Get("schema").As<Napi::Object>()) :
|
|
443
|
+
"{}";
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
auto tools_str = params.Has("tools") ?
|
|
447
|
+
json_stringify(params.Get("tools").As<Napi::Array>()) :
|
|
448
|
+
"";
|
|
449
|
+
auto parallel_tool_calls = get_option<bool>(params, "parallel_tool_calls", false);
|
|
450
|
+
auto tool_choice = get_option<std::string>(params, "tool_choice", "");
|
|
451
|
+
|
|
452
|
+
auto chatParams = getFormattedChatWithJinja(_sess->model(), _templates, messages, chat_template, json_schema_str, tools_str, parallel_tool_calls, tool_choice);
|
|
453
|
+
|
|
454
|
+
Napi::Object result = Napi::Object::New(env);
|
|
455
|
+
result.Set("prompt", chatParams.prompt.get<std::string>());
|
|
456
|
+
// chat_format: int
|
|
457
|
+
result.Set("chat_format", static_cast<int>(chatParams.format));
|
|
458
|
+
// grammar: string
|
|
459
|
+
result.Set("grammar", chatParams.grammar);
|
|
460
|
+
// grammar_lazy: boolean
|
|
461
|
+
result.Set("grammea_lazy", chatParams.grammar_lazy);
|
|
462
|
+
// grammar_triggers: [{ word: string, at_start: boolean }]
|
|
463
|
+
Napi::Array grammar_triggers = Napi::Array::New(env);
|
|
464
|
+
for (size_t i = 0; i < chatParams.grammar_triggers.size(); i++) {
|
|
465
|
+
const auto & trigger = chatParams.grammar_triggers[i];
|
|
466
|
+
Napi::Object triggerObj = Napi::Object::New(env);
|
|
467
|
+
triggerObj.Set("word", Napi::String::New(env, trigger.word.c_str()));
|
|
468
|
+
triggerObj.Set("at_start", Napi::Boolean::New(env, trigger.at_start));
|
|
469
|
+
grammar_triggers.Set(i, triggerObj);
|
|
470
|
+
}
|
|
471
|
+
result.Set("grammar_triggers", grammar_triggers);
|
|
472
|
+
// preserved_tokens: string[]
|
|
473
|
+
Napi::Array preserved_tokens = Napi::Array::New(env);
|
|
474
|
+
for (size_t i = 0; i < chatParams.preserved_tokens.size(); i++) {
|
|
475
|
+
preserved_tokens.Set(i, Napi::String::New(env, chatParams.preserved_tokens[i].c_str()));
|
|
476
|
+
}
|
|
477
|
+
result.Set("preserved_tokens", preserved_tokens);
|
|
478
|
+
// additional_stops: string[]
|
|
479
|
+
Napi::Array additional_stops = Napi::Array::New(env);
|
|
480
|
+
for (size_t i = 0; i < chatParams.additional_stops.size(); i++) {
|
|
481
|
+
additional_stops.Set(i, Napi::String::New(env, chatParams.additional_stops[i].c_str()));
|
|
482
|
+
}
|
|
483
|
+
result.Set("additional_stops", additional_stops);
|
|
484
|
+
|
|
485
|
+
return result;
|
|
486
|
+
} else {
|
|
487
|
+
auto formatted = getFormattedChat(_sess->model(), _templates, messages, chat_template);
|
|
488
|
+
return Napi::String::New(env, formatted);
|
|
489
|
+
}
|
|
313
490
|
}
|
|
314
491
|
|
|
315
492
|
// completion(options: LlamaCompletionOptions, onToken?: (token: string) =>
|
|
@@ -332,11 +509,101 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
332
509
|
}
|
|
333
510
|
auto options = info[0].As<Napi::Object>();
|
|
334
511
|
|
|
512
|
+
std::vector<std::string> stop_words;
|
|
513
|
+
if (options.Has("stop") && options.Get("stop").IsArray()) {
|
|
514
|
+
auto stop_words_array = options.Get("stop").As<Napi::Array>();
|
|
515
|
+
for (size_t i = 0; i < stop_words_array.Length(); i++) {
|
|
516
|
+
stop_words.push_back(stop_words_array.Get(i).ToString().Utf8Value());
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
int32_t chat_format = get_option<int32_t>(options, "chat_format", 0);
|
|
521
|
+
|
|
335
522
|
common_params params = _sess->params();
|
|
523
|
+
auto grammar_from_params = get_option<std::string>(options, "grammar", "");
|
|
524
|
+
auto has_grammar_set = !grammar_from_params.empty();
|
|
525
|
+
if (has_grammar_set) {
|
|
526
|
+
params.sampling.grammar = grammar_from_params;
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
std::string json_schema_str = "";
|
|
530
|
+
if (options.Has("response_format")) {
|
|
531
|
+
auto response_format = options.Get("response_format").As<Napi::Object>();
|
|
532
|
+
auto response_format_type = get_option<std::string>(response_format, "type", "text");
|
|
533
|
+
if (response_format_type == "json_schema" && response_format.Has("json_schema")) {
|
|
534
|
+
auto json_schema = response_format.Get("json_schema").As<Napi::Object>();
|
|
535
|
+
json_schema_str = json_schema.Has("schema") ?
|
|
536
|
+
json_stringify(json_schema.Get("schema").As<Napi::Object>()) :
|
|
537
|
+
"{}";
|
|
538
|
+
} else if (response_format_type == "json_object") {
|
|
539
|
+
json_schema_str = response_format.Has("schema") ?
|
|
540
|
+
json_stringify(response_format.Get("schema").As<Napi::Object>()) :
|
|
541
|
+
"{}";
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
|
|
336
545
|
if (options.Has("messages") && options.Get("messages").IsArray()) {
|
|
337
546
|
auto messages = options.Get("messages").As<Napi::Array>();
|
|
338
|
-
auto
|
|
339
|
-
|
|
547
|
+
auto chat_template = get_option<std::string>(options, "chat_template", "");
|
|
548
|
+
auto jinja = get_option<bool>(options, "jinja", false);
|
|
549
|
+
if (jinja) {
|
|
550
|
+
auto tools_str = options.Has("tools") ?
|
|
551
|
+
json_stringify(options.Get("tools").As<Napi::Array>()) :
|
|
552
|
+
"";
|
|
553
|
+
auto parallel_tool_calls = get_option<bool>(options, "parallel_tool_calls", false);
|
|
554
|
+
auto tool_choice = get_option<std::string>(options, "tool_choice", "none");
|
|
555
|
+
|
|
556
|
+
auto chatParams = getFormattedChatWithJinja(
|
|
557
|
+
_sess->model(),
|
|
558
|
+
_templates,
|
|
559
|
+
json_stringify(messages),
|
|
560
|
+
chat_template,
|
|
561
|
+
json_schema_str,
|
|
562
|
+
tools_str,
|
|
563
|
+
parallel_tool_calls,
|
|
564
|
+
tool_choice
|
|
565
|
+
);
|
|
566
|
+
|
|
567
|
+
params.prompt = chatParams.prompt.get<std::string>();
|
|
568
|
+
|
|
569
|
+
chat_format = chatParams.format;
|
|
570
|
+
|
|
571
|
+
if (!has_grammar_set) {
|
|
572
|
+
// grammar param always wins jinja template & json_schema
|
|
573
|
+
params.sampling.grammar = chatParams.grammar;
|
|
574
|
+
params.sampling.grammar_lazy = chatParams.grammar_lazy;
|
|
575
|
+
|
|
576
|
+
for (const auto & trigger : chatParams.grammar_triggers) {
|
|
577
|
+
auto ids = common_tokenize(_sess->context(), trigger.word, /* add_special= */ false, /* parse_special= */ true);
|
|
578
|
+
if (ids.size() == 1) {
|
|
579
|
+
params.sampling.grammar_trigger_tokens.push_back(ids[0]);
|
|
580
|
+
params.sampling.preserved_tokens.insert(ids[0]);
|
|
581
|
+
continue;
|
|
582
|
+
}
|
|
583
|
+
params.sampling.grammar_trigger_words.push_back(trigger);
|
|
584
|
+
}
|
|
585
|
+
has_grammar_set = true;
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
for (const auto & token : chatParams.preserved_tokens) {
|
|
589
|
+
auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
|
|
590
|
+
if (ids.size() == 1) {
|
|
591
|
+
params.sampling.preserved_tokens.insert(ids[0]);
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
for (const auto & stop : chatParams.additional_stops) {
|
|
596
|
+
stop_words.push_back(stop);
|
|
597
|
+
}
|
|
598
|
+
} else {
|
|
599
|
+
auto formatted = getFormattedChat(
|
|
600
|
+
_sess->model(),
|
|
601
|
+
_templates,
|
|
602
|
+
json_stringify(messages),
|
|
603
|
+
chat_template
|
|
604
|
+
);
|
|
605
|
+
params.prompt = formatted;
|
|
606
|
+
}
|
|
340
607
|
} else {
|
|
341
608
|
params.prompt = get_option<std::string>(options, "prompt", "");
|
|
342
609
|
}
|
|
@@ -344,6 +611,11 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
344
611
|
Napi::TypeError::New(env, "Prompt is required")
|
|
345
612
|
.ThrowAsJavaScriptException();
|
|
346
613
|
}
|
|
614
|
+
|
|
615
|
+
if (!has_grammar_set && !json_schema_str.empty()) {
|
|
616
|
+
params.sampling.grammar = json_schema_to_grammar(json::parse(json_schema_str));
|
|
617
|
+
}
|
|
618
|
+
|
|
347
619
|
params.n_predict = get_option<int32_t>(options, "n_predict", -1);
|
|
348
620
|
params.sampling.temp = get_option<float>(options, "temperature", 0.80f);
|
|
349
621
|
params.sampling.top_k = get_option<int32_t>(options, "top_k", 40);
|
|
@@ -370,16 +642,8 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
370
642
|
params.sampling.dry_allowed_length = get_option<float>(options, "dry_allowed_length", -1);
|
|
371
643
|
params.sampling.dry_penalty_last_n = get_option<float>(options, "dry_penalty_last_n", 0);
|
|
372
644
|
params.sampling.ignore_eos = get_option<bool>(options, "ignore_eos", false);
|
|
373
|
-
params.sampling.grammar = get_option<std::string>(options, "grammar", "");
|
|
374
645
|
params.n_keep = get_option<int32_t>(options, "n_keep", 0);
|
|
375
646
|
params.sampling.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
|
|
376
|
-
std::vector<std::string> stop_words;
|
|
377
|
-
if (options.Has("stop") && options.Get("stop").IsArray()) {
|
|
378
|
-
auto stop_words_array = options.Get("stop").As<Napi::Array>();
|
|
379
|
-
for (size_t i = 0; i < stop_words_array.Length(); i++) {
|
|
380
|
-
stop_words.push_back(stop_words_array.Get(i).ToString().Utf8Value());
|
|
381
|
-
}
|
|
382
|
-
}
|
|
383
647
|
|
|
384
648
|
Napi::Function callback;
|
|
385
649
|
if (info.Length() >= 2) {
|
|
@@ -387,7 +651,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
387
651
|
}
|
|
388
652
|
|
|
389
653
|
auto *worker =
|
|
390
|
-
new LlamaCompletionWorker(info, _sess, callback, params, stop_words);
|
|
654
|
+
new LlamaCompletionWorker(info, _sess, callback, params, stop_words, chat_format);
|
|
391
655
|
worker->Queue();
|
|
392
656
|
_wip = worker;
|
|
393
657
|
worker->onComplete([this]() { _wip = nullptr; });
|
package/src/LlamaContext.h
CHANGED
package/src/common.hpp
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
#include "common/common.h"
|
|
4
4
|
#include "common/sampling.h"
|
|
5
|
+
#include "chat.hpp"
|
|
6
|
+
#include "chat-template.hpp"
|
|
5
7
|
#include "llama.h"
|
|
6
8
|
#include <memory>
|
|
7
9
|
#include <mutex>
|
|
@@ -15,11 +17,26 @@ typedef std::unique_ptr<common_sampler, decltype(&common_sampler_free)>
|
|
|
15
17
|
LlamaCppSampling;
|
|
16
18
|
typedef std::unique_ptr<llama_batch, decltype(&llama_batch_free)> LlamaCppBatch;
|
|
17
19
|
|
|
20
|
+
static bool is_nil(const Napi::Value &value) {
|
|
21
|
+
return value.IsNull() || value.IsUndefined();
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
static std::string json_stringify(const Napi::Object &obj) {
|
|
25
|
+
Napi::Env env = obj.Env();
|
|
26
|
+
Napi::Object json = env.Global().Get("JSON").As<Napi::Object>();
|
|
27
|
+
Napi::Function stringify = json.Get("stringify").As<Napi::Function>();
|
|
28
|
+
return stringify.Call(json, { obj }).As<Napi::String>().ToString();
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
static void console_log(Napi::Env env, const std::string& message) {
|
|
32
|
+
Napi::Function consoleLog = env.Global().Get("console").As<Napi::Object>().Get("log").As<Napi::Function>();
|
|
33
|
+
consoleLog.Call({ Napi::String::New(env, message) });
|
|
34
|
+
}
|
|
35
|
+
|
|
18
36
|
template <typename T>
|
|
19
37
|
constexpr T get_option(const Napi::Object &options, const std::string &name,
|
|
20
38
|
const T default_value) {
|
|
21
|
-
if (options.Has(name) && !options.Get(name)
|
|
22
|
-
!options.Get(name).IsNull()) {
|
|
39
|
+
if (options.Has(name) && !is_nil(options.Get(name))) {
|
|
23
40
|
if constexpr (std::is_same<T, std::string>::value) {
|
|
24
41
|
return options.Get(name).ToString().operator T();
|
|
25
42
|
} else if constexpr (std::is_same<T, int32_t>::value ||
|