@fugood/llama.node 0.3.9 → 0.3.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.js +2 -2
- package/lib/binding.ts +47 -8
- package/lib/index.js +21 -1
- package/lib/index.ts +31 -1
- package/package.json +12 -3
- package/src/LlamaCompletionWorker.cpp +33 -6
- package/src/LlamaCompletionWorker.h +3 -1
- package/src/LlamaContext.cpp +336 -28
- package/src/LlamaContext.h +2 -0
- package/src/common.hpp +19 -2
- package/src/llama.cpp/.github/workflows/build.yml +289 -107
- package/src/llama.cpp/.github/workflows/close-issue.yml +1 -1
- package/src/llama.cpp/.github/workflows/docker.yml +2 -1
- package/src/llama.cpp/.github/workflows/server.yml +25 -2
- package/src/llama.cpp/CMakeLists.txt +10 -19
- package/src/llama.cpp/cmake/build-info.cmake +1 -1
- package/src/llama.cpp/common/CMakeLists.txt +32 -0
- package/src/llama.cpp/common/arg.cpp +66 -16
- package/src/llama.cpp/common/chat-template.hpp +515 -0
- package/src/llama.cpp/common/chat.cpp +966 -0
- package/src/llama.cpp/common/chat.hpp +52 -0
- package/src/llama.cpp/common/common.cpp +159 -36
- package/src/llama.cpp/common/common.h +56 -14
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +46 -66
- package/src/llama.cpp/common/json-schema-to-grammar.h +15 -1
- package/src/llama.cpp/common/llguidance.cpp +270 -0
- package/src/llama.cpp/common/log.cpp +1 -10
- package/src/llama.cpp/common/log.h +10 -0
- package/src/llama.cpp/common/minja.hpp +2868 -0
- package/src/llama.cpp/common/sampling.cpp +22 -1
- package/src/llama.cpp/common/sampling.h +3 -0
- package/src/llama.cpp/docs/build.md +54 -9
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +12 -2
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +1 -1
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +59 -0
- package/src/llama.cpp/examples/llava/clip.cpp +133 -14
- package/src/llama.cpp/examples/llava/clip.h +2 -0
- package/src/llama.cpp/examples/llava/llava.cpp +22 -8
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +9 -1
- package/src/llama.cpp/examples/main/main.cpp +26 -25
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +136 -137
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +18 -4
- package/src/llama.cpp/examples/run/run.cpp +224 -69
- package/src/llama.cpp/examples/server/server.cpp +252 -81
- package/src/llama.cpp/examples/server/utils.hpp +73 -21
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +6 -4
- package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +11 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +78 -1
- package/src/llama.cpp/ggml/include/ggml.h +1 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +21 -4
- package/src/llama.cpp/ggml/src/ggml-alloc.c +1 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +91 -78
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +7 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +46 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +16 -1
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +28 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +5 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +33 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +1 -5
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +323 -121
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
- package/src/llama.cpp/ggml/src/ggml.c +23 -13
- package/src/llama.cpp/include/llama.h +14 -1
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +46 -0
- package/src/llama.cpp/src/CMakeLists.txt +1 -1
- package/src/llama.cpp/src/llama-arch.cpp +7 -2
- package/src/llama.cpp/src/llama-arch.h +3 -1
- package/src/llama.cpp/src/llama-chat.cpp +11 -2
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-grammar.cpp +86 -6
- package/src/llama.cpp/src/llama-grammar.h +22 -1
- package/src/llama.cpp/src/llama-mmap.cpp +1 -0
- package/src/llama.cpp/src/llama-model-loader.cpp +1 -1
- package/src/llama.cpp/src/llama-model.cpp +76 -6
- package/src/llama.cpp/src/llama-sampling.cpp +47 -4
- package/src/llama.cpp/src/llama-vocab.cpp +10 -4
- package/src/llama.cpp/src/llama.cpp +181 -123
- package/src/llama.cpp/tests/CMakeLists.txt +4 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +158 -57
- package/src/llama.cpp/tests/test-chat-template.cpp +154 -31
- package/src/llama.cpp/tests/test-chat.cpp +607 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +2 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +1140 -0
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -32
package/src/LlamaContext.cpp
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
#include "ggml.h"
|
|
2
2
|
#include "gguf.h"
|
|
3
3
|
#include "llama-impl.h"
|
|
4
|
+
#include "json.hpp"
|
|
5
|
+
#include "json-schema-to-grammar.h"
|
|
4
6
|
#include "LlamaContext.h"
|
|
5
7
|
#include "DetokenizeWorker.h"
|
|
6
8
|
#include "DisposeWorker.h"
|
|
@@ -10,6 +12,8 @@
|
|
|
10
12
|
#include "SaveSessionWorker.h"
|
|
11
13
|
#include "TokenizeWorker.h"
|
|
12
14
|
|
|
15
|
+
using json = nlohmann::ordered_json;
|
|
16
|
+
|
|
13
17
|
// loadModelInfo(path: string): object
|
|
14
18
|
Napi::Value LlamaContext::ModelInfo(const Napi::CallbackInfo& info) {
|
|
15
19
|
Napi::Env env = info.Env();
|
|
@@ -116,6 +120,9 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
|
|
|
116
120
|
"release", static_cast<napi_property_attributes>(napi_enumerable)),
|
|
117
121
|
StaticMethod<&LlamaContext::ModelInfo>(
|
|
118
122
|
"loadModelInfo",
|
|
123
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
124
|
+
StaticMethod<&LlamaContext::ToggleNativeLog>(
|
|
125
|
+
"toggleNativeLog",
|
|
119
126
|
static_cast<napi_property_attributes>(napi_enumerable))});
|
|
120
127
|
Napi::FunctionReference *constructor = new Napi::FunctionReference();
|
|
121
128
|
*constructor = Napi::Persistent(func);
|
|
@@ -176,6 +183,8 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
176
183
|
params.warmup = false;
|
|
177
184
|
}
|
|
178
185
|
|
|
186
|
+
params.chat_template = get_option<std::string>(options, "chat_template", "");
|
|
187
|
+
|
|
179
188
|
params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
|
|
180
189
|
params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
|
|
181
190
|
params.n_ubatch = get_option<int32_t>(options, "n_ubatch", 512);
|
|
@@ -255,6 +264,8 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
255
264
|
|
|
256
265
|
_sess = sess;
|
|
257
266
|
_info = common_params_get_system_info(params);
|
|
267
|
+
|
|
268
|
+
_templates = common_chat_templates_from_model(model, params.chat_template);
|
|
258
269
|
}
|
|
259
270
|
|
|
260
271
|
// getSystemInfo(): string
|
|
@@ -262,17 +273,52 @@ Napi::Value LlamaContext::GetSystemInfo(const Napi::CallbackInfo &info) {
|
|
|
262
273
|
return Napi::String::New(info.Env(), _info);
|
|
263
274
|
}
|
|
264
275
|
|
|
265
|
-
bool validateModelChatTemplate(const struct llama_model * model) {
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
+
bool validateModelChatTemplate(const struct llama_model * model, const bool use_jinja, const char * name) {
|
|
277
|
+
const char * tmpl = llama_model_chat_template(model, name);
|
|
278
|
+
if (tmpl == nullptr) {
|
|
279
|
+
return false;
|
|
280
|
+
}
|
|
281
|
+
return common_chat_verify_template(tmpl, use_jinja);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
static Napi::FunctionReference _log_callback;
|
|
285
|
+
|
|
286
|
+
// toggleNativeLog(enable: boolean, callback: (log: string) => void): void
|
|
287
|
+
void LlamaContext::ToggleNativeLog(const Napi::CallbackInfo &info) {
|
|
288
|
+
bool enable = info[0].ToBoolean().Value();
|
|
289
|
+
if (enable) {
|
|
290
|
+
_log_callback.Reset(info[1].As<Napi::Function>());
|
|
291
|
+
|
|
292
|
+
llama_log_set([](ggml_log_level level, const char * text, void * user_data) {
|
|
293
|
+
llama_log_callback_default(level, text, user_data);
|
|
294
|
+
|
|
295
|
+
std::string level_str = "";
|
|
296
|
+
if (level == GGML_LOG_LEVEL_ERROR) {
|
|
297
|
+
level_str = "error";
|
|
298
|
+
} else if (level == GGML_LOG_LEVEL_INFO) {
|
|
299
|
+
level_str = "info";
|
|
300
|
+
} else if (level == GGML_LOG_LEVEL_WARN) {
|
|
301
|
+
level_str = "warn";
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
if (_log_callback.IsEmpty()) {
|
|
305
|
+
return;
|
|
306
|
+
}
|
|
307
|
+
try {
|
|
308
|
+
Napi::Env env = _log_callback.Env();
|
|
309
|
+
Napi::HandleScope scope(env);
|
|
310
|
+
_log_callback.Call({
|
|
311
|
+
Napi::String::New(env, level_str),
|
|
312
|
+
Napi::String::New(env, text)
|
|
313
|
+
});
|
|
314
|
+
} catch (const std::exception &e) {
|
|
315
|
+
// printf("Error calling log callback: %s\n", e.what());
|
|
316
|
+
}
|
|
317
|
+
}, nullptr);
|
|
318
|
+
} else {
|
|
319
|
+
_log_callback.Reset();
|
|
320
|
+
llama_log_set(llama_log_callback_default, nullptr);
|
|
321
|
+
}
|
|
276
322
|
}
|
|
277
323
|
|
|
278
324
|
// getModelInfo(): object
|
|
@@ -286,7 +332,7 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
|
286
332
|
for (int i = 0; i < count; i++) {
|
|
287
333
|
char key[256];
|
|
288
334
|
llama_model_meta_key_by_index(model, i, key, sizeof(key));
|
|
289
|
-
char val[
|
|
335
|
+
char val[4096];
|
|
290
336
|
llama_model_meta_val_str_by_index(model, i, val, sizeof(val));
|
|
291
337
|
|
|
292
338
|
metadata.Set(key, val);
|
|
@@ -296,20 +342,195 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
|
296
342
|
details.Set("nEmbd", llama_model_n_embd(model));
|
|
297
343
|
details.Set("nParams", llama_model_n_params(model));
|
|
298
344
|
details.Set("size", llama_model_size(model));
|
|
299
|
-
|
|
345
|
+
|
|
346
|
+
Napi::Object chatTemplates = Napi::Object::New(info.Env());
|
|
347
|
+
chatTemplates.Set("llamaChat", validateModelChatTemplate(model, false, ""));
|
|
348
|
+
Napi::Object minja = Napi::Object::New(info.Env());
|
|
349
|
+
minja.Set("default", validateModelChatTemplate(model, true, ""));
|
|
350
|
+
Napi::Object defaultCaps = Napi::Object::New(info.Env());
|
|
351
|
+
defaultCaps.Set("tools", _templates.template_default->original_caps().supports_tools);
|
|
352
|
+
defaultCaps.Set("toolCalls", _templates.template_default->original_caps().supports_tool_calls);
|
|
353
|
+
defaultCaps.Set("toolResponses", _templates.template_default->original_caps().supports_tool_responses);
|
|
354
|
+
defaultCaps.Set("systemRole", _templates.template_default->original_caps().supports_system_role);
|
|
355
|
+
defaultCaps.Set("parallelToolCalls", _templates.template_default->original_caps().supports_parallel_tool_calls);
|
|
356
|
+
defaultCaps.Set("toolCallId", _templates.template_default->original_caps().supports_tool_call_id);
|
|
357
|
+
minja.Set("defaultCaps", defaultCaps);
|
|
358
|
+
minja.Set("toolUse", validateModelChatTemplate(model, true, "tool_use"));
|
|
359
|
+
if (_templates.template_tool_use) {
|
|
360
|
+
Napi::Object toolUseCaps = Napi::Object::New(info.Env());
|
|
361
|
+
toolUseCaps.Set("tools", _templates.template_tool_use->original_caps().supports_tools);
|
|
362
|
+
toolUseCaps.Set("toolCalls", _templates.template_tool_use->original_caps().supports_tool_calls);
|
|
363
|
+
toolUseCaps.Set("toolResponses", _templates.template_tool_use->original_caps().supports_tool_responses);
|
|
364
|
+
toolUseCaps.Set("systemRole", _templates.template_tool_use->original_caps().supports_system_role);
|
|
365
|
+
toolUseCaps.Set("parallelToolCalls", _templates.template_tool_use->original_caps().supports_parallel_tool_calls);
|
|
366
|
+
toolUseCaps.Set("toolCallId", _templates.template_tool_use->original_caps().supports_tool_call_id);
|
|
367
|
+
minja.Set("toolUseCaps", toolUseCaps);
|
|
368
|
+
}
|
|
369
|
+
chatTemplates.Set("minja", minja);
|
|
370
|
+
details.Set("chatTemplates", chatTemplates);
|
|
371
|
+
|
|
300
372
|
details.Set("metadata", metadata);
|
|
373
|
+
|
|
374
|
+
// Deprecated: use chatTemplates.llamaChat instead
|
|
375
|
+
details.Set("isChatTemplateSupported", validateModelChatTemplate(_sess->model(), false, ""));
|
|
301
376
|
return details;
|
|
302
377
|
}
|
|
303
378
|
|
|
304
|
-
|
|
379
|
+
common_chat_params getFormattedChatWithJinja(
|
|
380
|
+
const struct llama_model * model,
|
|
381
|
+
const common_chat_templates &templates,
|
|
382
|
+
const std::string &messages,
|
|
383
|
+
const std::string &chat_template,
|
|
384
|
+
const std::string &json_schema,
|
|
385
|
+
const std::string &tools,
|
|
386
|
+
const bool ¶llel_tool_calls,
|
|
387
|
+
const std::string &tool_choice
|
|
388
|
+
) {
|
|
389
|
+
common_chat_inputs inputs;
|
|
390
|
+
inputs.messages = json::parse(messages);
|
|
391
|
+
auto useTools = !tools.empty();
|
|
392
|
+
if (useTools) {
|
|
393
|
+
inputs.tools = json::parse(tools);
|
|
394
|
+
}
|
|
395
|
+
inputs.parallel_tool_calls = parallel_tool_calls;
|
|
396
|
+
if (!tool_choice.empty()) {
|
|
397
|
+
inputs.tool_choice = tool_choice;
|
|
398
|
+
}
|
|
399
|
+
if (!json_schema.empty()) {
|
|
400
|
+
inputs.json_schema = json::parse(json_schema);
|
|
401
|
+
}
|
|
402
|
+
inputs.stream = true;
|
|
403
|
+
|
|
404
|
+
// If chat_template is provided, create new one and use it (probably slow)
|
|
405
|
+
if (!chat_template.empty()) {
|
|
406
|
+
auto tmp = common_chat_templates_from_model(model, chat_template);
|
|
407
|
+
const common_chat_template* template_ptr = useTools && tmp.template_tool_use ? tmp.template_tool_use.get() : tmp.template_default.get();
|
|
408
|
+
if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
|
|
409
|
+
inputs.parallel_tool_calls = false;
|
|
410
|
+
}
|
|
411
|
+
return common_chat_params_init(*template_ptr, inputs);
|
|
412
|
+
} else {
|
|
413
|
+
const common_chat_template* template_ptr = useTools && templates.template_tool_use ? templates.template_tool_use.get() : templates.template_default.get();
|
|
414
|
+
if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
|
|
415
|
+
inputs.parallel_tool_calls = false;
|
|
416
|
+
}
|
|
417
|
+
return common_chat_params_init(*template_ptr, inputs);
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
std::string getFormattedChat(
|
|
422
|
+
const struct llama_model * model,
|
|
423
|
+
const common_chat_templates &templates,
|
|
424
|
+
const std::string &messages,
|
|
425
|
+
const std::string &chat_template
|
|
426
|
+
) {
|
|
427
|
+
auto chat_json = json::parse(messages);
|
|
428
|
+
|
|
429
|
+
// Handle regular chat without tools
|
|
430
|
+
std::vector<common_chat_msg> chat_msgs;
|
|
431
|
+
for (const auto &msg : chat_json) {
|
|
432
|
+
chat_msgs.push_back({
|
|
433
|
+
msg["role"].get<std::string>(),
|
|
434
|
+
msg["content"].get<std::string>()
|
|
435
|
+
});
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
// If chat_template is provided, create new one and use it (probably slow)
|
|
439
|
+
if (!chat_template.empty()) {
|
|
440
|
+
auto tmp = common_chat_templates_from_model(model, chat_template);
|
|
441
|
+
return common_chat_apply_template(
|
|
442
|
+
*tmp.template_default,
|
|
443
|
+
chat_msgs,
|
|
444
|
+
true,
|
|
445
|
+
false
|
|
446
|
+
);
|
|
447
|
+
} else {
|
|
448
|
+
return common_chat_apply_template(
|
|
449
|
+
*templates.template_default,
|
|
450
|
+
chat_msgs,
|
|
451
|
+
true,
|
|
452
|
+
false
|
|
453
|
+
);
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
// getFormattedChat(
|
|
458
|
+
// messages: [{ role: string, content: string }],
|
|
459
|
+
// chat_template: string,
|
|
460
|
+
// params: { jinja: boolean, json_schema: string, tools: string, parallel_tool_calls: boolean, tool_choice: string }
|
|
461
|
+
// ): object | string
|
|
305
462
|
Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
|
|
306
463
|
Napi::Env env = info.Env();
|
|
307
464
|
if (info.Length() < 1 || !info[0].IsArray()) {
|
|
308
465
|
Napi::TypeError::New(env, "Array expected").ThrowAsJavaScriptException();
|
|
309
466
|
}
|
|
310
|
-
auto messages = info[0].As<Napi::Array>();
|
|
311
|
-
|
|
312
|
-
|
|
467
|
+
auto messages = json_stringify(info[0].As<Napi::Array>());
|
|
468
|
+
printf("messages: %s\n", messages.c_str());
|
|
469
|
+
auto chat_template = info[1].IsString() ? info[1].ToString().Utf8Value() : "";
|
|
470
|
+
|
|
471
|
+
auto has_params = info.Length() >= 2;
|
|
472
|
+
auto params = has_params ? info[2].As<Napi::Object>() : Napi::Object::New(env);
|
|
473
|
+
|
|
474
|
+
if (get_option<bool>(params, "jinja", false)) {
|
|
475
|
+
std::string json_schema_str = "";
|
|
476
|
+
if (!is_nil(params.Get("response_format"))) {
|
|
477
|
+
auto response_format = params.Get("response_format").As<Napi::Object>();
|
|
478
|
+
auto response_format_type = get_option<std::string>(response_format, "type", "text");
|
|
479
|
+
if (response_format_type == "json_schema" && response_format.Has("json_schema")) {
|
|
480
|
+
auto json_schema = response_format.Get("json_schema").As<Napi::Object>();
|
|
481
|
+
json_schema_str = json_schema.Has("schema") ?
|
|
482
|
+
json_stringify(json_schema.Get("schema").As<Napi::Object>()) :
|
|
483
|
+
"{}";
|
|
484
|
+
} else if (response_format_type == "json_object") {
|
|
485
|
+
json_schema_str = response_format.Has("schema") ?
|
|
486
|
+
json_stringify(response_format.Get("schema").As<Napi::Object>()) :
|
|
487
|
+
"{}";
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
auto tools_str = params.Has("tools") ?
|
|
491
|
+
json_stringify(params.Get("tools").As<Napi::Array>()) :
|
|
492
|
+
"";
|
|
493
|
+
auto parallel_tool_calls = get_option<bool>(params, "parallel_tool_calls", false);
|
|
494
|
+
auto tool_choice = get_option<std::string>(params, "tool_choice", "");
|
|
495
|
+
|
|
496
|
+
auto chatParams = getFormattedChatWithJinja(_sess->model(), _templates, messages, chat_template, json_schema_str, tools_str, parallel_tool_calls, tool_choice);
|
|
497
|
+
|
|
498
|
+
Napi::Object result = Napi::Object::New(env);
|
|
499
|
+
result.Set("prompt", chatParams.prompt.get<std::string>());
|
|
500
|
+
// chat_format: int
|
|
501
|
+
result.Set("chat_format", static_cast<int>(chatParams.format));
|
|
502
|
+
// grammar: string
|
|
503
|
+
result.Set("grammar", chatParams.grammar);
|
|
504
|
+
// grammar_lazy: boolean
|
|
505
|
+
result.Set("grammea_lazy", chatParams.grammar_lazy);
|
|
506
|
+
// grammar_triggers: [{ word: string, at_start: boolean }]
|
|
507
|
+
Napi::Array grammar_triggers = Napi::Array::New(env);
|
|
508
|
+
for (size_t i = 0; i < chatParams.grammar_triggers.size(); i++) {
|
|
509
|
+
const auto & trigger = chatParams.grammar_triggers[i];
|
|
510
|
+
Napi::Object triggerObj = Napi::Object::New(env);
|
|
511
|
+
triggerObj.Set("word", Napi::String::New(env, trigger.word.c_str()));
|
|
512
|
+
triggerObj.Set("at_start", Napi::Boolean::New(env, trigger.at_start));
|
|
513
|
+
grammar_triggers.Set(i, triggerObj);
|
|
514
|
+
}
|
|
515
|
+
result.Set("grammar_triggers", grammar_triggers);
|
|
516
|
+
// preserved_tokens: string[]
|
|
517
|
+
Napi::Array preserved_tokens = Napi::Array::New(env);
|
|
518
|
+
for (size_t i = 0; i < chatParams.preserved_tokens.size(); i++) {
|
|
519
|
+
preserved_tokens.Set(i, Napi::String::New(env, chatParams.preserved_tokens[i].c_str()));
|
|
520
|
+
}
|
|
521
|
+
result.Set("preserved_tokens", preserved_tokens);
|
|
522
|
+
// additional_stops: string[]
|
|
523
|
+
Napi::Array additional_stops = Napi::Array::New(env);
|
|
524
|
+
for (size_t i = 0; i < chatParams.additional_stops.size(); i++) {
|
|
525
|
+
additional_stops.Set(i, Napi::String::New(env, chatParams.additional_stops[i].c_str()));
|
|
526
|
+
}
|
|
527
|
+
result.Set("additional_stops", additional_stops);
|
|
528
|
+
|
|
529
|
+
return result;
|
|
530
|
+
} else {
|
|
531
|
+
auto formatted = getFormattedChat(_sess->model(), _templates, messages, chat_template);
|
|
532
|
+
return Napi::String::New(env, formatted);
|
|
533
|
+
}
|
|
313
534
|
}
|
|
314
535
|
|
|
315
536
|
// completion(options: LlamaCompletionOptions, onToken?: (token: string) =>
|
|
@@ -332,11 +553,101 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
332
553
|
}
|
|
333
554
|
auto options = info[0].As<Napi::Object>();
|
|
334
555
|
|
|
556
|
+
std::vector<std::string> stop_words;
|
|
557
|
+
if (options.Has("stop") && options.Get("stop").IsArray()) {
|
|
558
|
+
auto stop_words_array = options.Get("stop").As<Napi::Array>();
|
|
559
|
+
for (size_t i = 0; i < stop_words_array.Length(); i++) {
|
|
560
|
+
stop_words.push_back(stop_words_array.Get(i).ToString().Utf8Value());
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
int32_t chat_format = get_option<int32_t>(options, "chat_format", 0);
|
|
565
|
+
|
|
335
566
|
common_params params = _sess->params();
|
|
567
|
+
auto grammar_from_params = get_option<std::string>(options, "grammar", "");
|
|
568
|
+
auto has_grammar_set = !grammar_from_params.empty();
|
|
569
|
+
if (has_grammar_set) {
|
|
570
|
+
params.sampling.grammar = grammar_from_params;
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
std::string json_schema_str = "";
|
|
574
|
+
if (options.Has("response_format")) {
|
|
575
|
+
auto response_format = options.Get("response_format").As<Napi::Object>();
|
|
576
|
+
auto response_format_type = get_option<std::string>(response_format, "type", "text");
|
|
577
|
+
if (response_format_type == "json_schema" && response_format.Has("json_schema")) {
|
|
578
|
+
auto json_schema = response_format.Get("json_schema").As<Napi::Object>();
|
|
579
|
+
json_schema_str = json_schema.Has("schema") ?
|
|
580
|
+
json_stringify(json_schema.Get("schema").As<Napi::Object>()) :
|
|
581
|
+
"{}";
|
|
582
|
+
} else if (response_format_type == "json_object") {
|
|
583
|
+
json_schema_str = response_format.Has("schema") ?
|
|
584
|
+
json_stringify(response_format.Get("schema").As<Napi::Object>()) :
|
|
585
|
+
"{}";
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
|
|
336
589
|
if (options.Has("messages") && options.Get("messages").IsArray()) {
|
|
337
590
|
auto messages = options.Get("messages").As<Napi::Array>();
|
|
338
|
-
auto
|
|
339
|
-
|
|
591
|
+
auto chat_template = get_option<std::string>(options, "chat_template", "");
|
|
592
|
+
auto jinja = get_option<bool>(options, "jinja", false);
|
|
593
|
+
if (jinja) {
|
|
594
|
+
auto tools_str = options.Has("tools") ?
|
|
595
|
+
json_stringify(options.Get("tools").As<Napi::Array>()) :
|
|
596
|
+
"";
|
|
597
|
+
auto parallel_tool_calls = get_option<bool>(options, "parallel_tool_calls", false);
|
|
598
|
+
auto tool_choice = get_option<std::string>(options, "tool_choice", "none");
|
|
599
|
+
|
|
600
|
+
auto chatParams = getFormattedChatWithJinja(
|
|
601
|
+
_sess->model(),
|
|
602
|
+
_templates,
|
|
603
|
+
json_stringify(messages),
|
|
604
|
+
chat_template,
|
|
605
|
+
json_schema_str,
|
|
606
|
+
tools_str,
|
|
607
|
+
parallel_tool_calls,
|
|
608
|
+
tool_choice
|
|
609
|
+
);
|
|
610
|
+
|
|
611
|
+
params.prompt = chatParams.prompt.get<std::string>();
|
|
612
|
+
|
|
613
|
+
chat_format = chatParams.format;
|
|
614
|
+
|
|
615
|
+
if (!has_grammar_set) {
|
|
616
|
+
// grammar param always wins jinja template & json_schema
|
|
617
|
+
params.sampling.grammar = chatParams.grammar;
|
|
618
|
+
params.sampling.grammar_lazy = chatParams.grammar_lazy;
|
|
619
|
+
|
|
620
|
+
for (const auto & trigger : chatParams.grammar_triggers) {
|
|
621
|
+
auto ids = common_tokenize(_sess->context(), trigger.word, /* add_special= */ false, /* parse_special= */ true);
|
|
622
|
+
if (ids.size() == 1) {
|
|
623
|
+
params.sampling.grammar_trigger_tokens.push_back(ids[0]);
|
|
624
|
+
params.sampling.preserved_tokens.insert(ids[0]);
|
|
625
|
+
continue;
|
|
626
|
+
}
|
|
627
|
+
params.sampling.grammar_trigger_words.push_back(trigger);
|
|
628
|
+
}
|
|
629
|
+
has_grammar_set = true;
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
for (const auto & token : chatParams.preserved_tokens) {
|
|
633
|
+
auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
|
|
634
|
+
if (ids.size() == 1) {
|
|
635
|
+
params.sampling.preserved_tokens.insert(ids[0]);
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
for (const auto & stop : chatParams.additional_stops) {
|
|
640
|
+
stop_words.push_back(stop);
|
|
641
|
+
}
|
|
642
|
+
} else {
|
|
643
|
+
auto formatted = getFormattedChat(
|
|
644
|
+
_sess->model(),
|
|
645
|
+
_templates,
|
|
646
|
+
json_stringify(messages),
|
|
647
|
+
chat_template
|
|
648
|
+
);
|
|
649
|
+
params.prompt = formatted;
|
|
650
|
+
}
|
|
340
651
|
} else {
|
|
341
652
|
params.prompt = get_option<std::string>(options, "prompt", "");
|
|
342
653
|
}
|
|
@@ -344,6 +655,11 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
344
655
|
Napi::TypeError::New(env, "Prompt is required")
|
|
345
656
|
.ThrowAsJavaScriptException();
|
|
346
657
|
}
|
|
658
|
+
|
|
659
|
+
if (!has_grammar_set && !json_schema_str.empty()) {
|
|
660
|
+
params.sampling.grammar = json_schema_to_grammar(json::parse(json_schema_str));
|
|
661
|
+
}
|
|
662
|
+
|
|
347
663
|
params.n_predict = get_option<int32_t>(options, "n_predict", -1);
|
|
348
664
|
params.sampling.temp = get_option<float>(options, "temperature", 0.80f);
|
|
349
665
|
params.sampling.top_k = get_option<int32_t>(options, "top_k", 40);
|
|
@@ -370,16 +686,8 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
370
686
|
params.sampling.dry_allowed_length = get_option<float>(options, "dry_allowed_length", -1);
|
|
371
687
|
params.sampling.dry_penalty_last_n = get_option<float>(options, "dry_penalty_last_n", 0);
|
|
372
688
|
params.sampling.ignore_eos = get_option<bool>(options, "ignore_eos", false);
|
|
373
|
-
params.sampling.grammar = get_option<std::string>(options, "grammar", "");
|
|
374
689
|
params.n_keep = get_option<int32_t>(options, "n_keep", 0);
|
|
375
690
|
params.sampling.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
|
|
376
|
-
std::vector<std::string> stop_words;
|
|
377
|
-
if (options.Has("stop") && options.Get("stop").IsArray()) {
|
|
378
|
-
auto stop_words_array = options.Get("stop").As<Napi::Array>();
|
|
379
|
-
for (size_t i = 0; i < stop_words_array.Length(); i++) {
|
|
380
|
-
stop_words.push_back(stop_words_array.Get(i).ToString().Utf8Value());
|
|
381
|
-
}
|
|
382
|
-
}
|
|
383
691
|
|
|
384
692
|
Napi::Function callback;
|
|
385
693
|
if (info.Length() >= 2) {
|
|
@@ -387,7 +695,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
387
695
|
}
|
|
388
696
|
|
|
389
697
|
auto *worker =
|
|
390
|
-
new LlamaCompletionWorker(info, _sess, callback, params, stop_words);
|
|
698
|
+
new LlamaCompletionWorker(info, _sess, callback, params, stop_words, chat_format);
|
|
391
699
|
worker->Queue();
|
|
392
700
|
_wip = worker;
|
|
393
701
|
worker->onComplete([this]() { _wip = nullptr; });
|
package/src/LlamaContext.h
CHANGED
|
@@ -5,6 +5,7 @@ class LlamaCompletionWorker;
|
|
|
5
5
|
class LlamaContext : public Napi::ObjectWrap<LlamaContext> {
|
|
6
6
|
public:
|
|
7
7
|
LlamaContext(const Napi::CallbackInfo &info);
|
|
8
|
+
static void ToggleNativeLog(const Napi::CallbackInfo &info);
|
|
8
9
|
static Napi::Value ModelInfo(const Napi::CallbackInfo& info);
|
|
9
10
|
static void Init(Napi::Env env, Napi::Object &exports);
|
|
10
11
|
|
|
@@ -27,6 +28,7 @@ private:
|
|
|
27
28
|
std::string _info;
|
|
28
29
|
Napi::Object _meta;
|
|
29
30
|
LlamaSessionPtr _sess = nullptr;
|
|
31
|
+
common_chat_templates _templates;
|
|
30
32
|
std::vector<common_adapter_lora_info> _lora;
|
|
31
33
|
LlamaCompletionWorker *_wip = nullptr;
|
|
32
34
|
};
|
package/src/common.hpp
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
#include "common/common.h"
|
|
4
4
|
#include "common/sampling.h"
|
|
5
|
+
#include "chat.hpp"
|
|
6
|
+
#include "chat-template.hpp"
|
|
5
7
|
#include "llama.h"
|
|
6
8
|
#include <memory>
|
|
7
9
|
#include <mutex>
|
|
@@ -15,11 +17,26 @@ typedef std::unique_ptr<common_sampler, decltype(&common_sampler_free)>
|
|
|
15
17
|
LlamaCppSampling;
|
|
16
18
|
typedef std::unique_ptr<llama_batch, decltype(&llama_batch_free)> LlamaCppBatch;
|
|
17
19
|
|
|
20
|
+
static bool is_nil(const Napi::Value &value) {
|
|
21
|
+
return value.IsNull() || value.IsUndefined();
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
static std::string json_stringify(const Napi::Object &obj) {
|
|
25
|
+
Napi::Env env = obj.Env();
|
|
26
|
+
Napi::Object json = env.Global().Get("JSON").As<Napi::Object>();
|
|
27
|
+
Napi::Function stringify = json.Get("stringify").As<Napi::Function>();
|
|
28
|
+
return stringify.Call(json, { obj }).As<Napi::String>().ToString();
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
static void console_log(Napi::Env env, const std::string& message) {
|
|
32
|
+
Napi::Function consoleLog = env.Global().Get("console").As<Napi::Object>().Get("log").As<Napi::Function>();
|
|
33
|
+
consoleLog.Call({ Napi::String::New(env, message) });
|
|
34
|
+
}
|
|
35
|
+
|
|
18
36
|
template <typename T>
|
|
19
37
|
constexpr T get_option(const Napi::Object &options, const std::string &name,
|
|
20
38
|
const T default_value) {
|
|
21
|
-
if (options.Has(name) && !options.Get(name)
|
|
22
|
-
!options.Get(name).IsNull()) {
|
|
39
|
+
if (options.Has(name) && !is_nil(options.Get(name))) {
|
|
23
40
|
if constexpr (std::is_same<T, std::string>::value) {
|
|
24
41
|
return options.Get(name).ToString().operator T();
|
|
25
42
|
} else if constexpr (std::is_same<T, int32_t>::value ||
|