@fugood/llama.node 0.3.12 → 0.3.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +2 -1
- package/package.json +1 -1
- package/src/LlamaCompletionWorker.cpp +14 -0
- package/src/LlamaContext.cpp +110 -79
- package/src/LlamaContext.h +1 -1
- package/src/common.hpp +1 -2
- package/src/llama.cpp/.github/workflows/build.yml +95 -13
- package/src/llama.cpp/.github/workflows/docker.yml +2 -0
- package/src/llama.cpp/.github/workflows/labeler.yml +1 -1
- package/src/llama.cpp/.github/workflows/server.yml +2 -0
- package/src/llama.cpp/common/CMakeLists.txt +23 -6
- package/src/llama.cpp/common/arg.cpp +292 -14
- package/src/llama.cpp/common/chat.cpp +1128 -315
- package/src/llama.cpp/common/chat.h +135 -0
- package/src/llama.cpp/common/common.cpp +27 -171
- package/src/llama.cpp/common/common.h +41 -73
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +4 -5
- package/src/llama.cpp/common/json-schema-to-grammar.h +0 -1
- package/src/llama.cpp/common/llguidance.cpp +3 -3
- package/src/llama.cpp/common/log.cpp +1 -0
- package/src/llama.cpp/common/log.h +2 -1
- package/src/llama.cpp/common/{chat-template.hpp → minja/chat-template.hpp} +21 -7
- package/src/llama.cpp/common/{minja.hpp → minja/minja.hpp} +61 -14
- package/src/llama.cpp/common/ngram-cache.cpp +1 -0
- package/src/llama.cpp/common/sampling.cpp +93 -49
- package/src/llama.cpp/common/speculative.cpp +6 -5
- package/src/llama.cpp/common/speculative.h +1 -1
- package/src/llama.cpp/docs/build.md +47 -9
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +3 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +1 -0
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +4 -2
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +4 -4
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +6 -5
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +1 -1
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +373 -107
- package/src/llama.cpp/examples/llava/clip.h +19 -3
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +341 -0
- package/src/llama.cpp/examples/llava/llava.cpp +4 -2
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +30 -11
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -0
- package/src/llama.cpp/examples/main/main.cpp +73 -28
- package/src/llama.cpp/examples/parallel/parallel.cpp +1 -0
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -0
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +1 -0
- package/src/llama.cpp/examples/quantize/quantize.cpp +1 -0
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +882 -237
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +35 -26
- package/src/llama.cpp/examples/run/run.cpp +115 -79
- package/src/llama.cpp/examples/server/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/server/httplib.h +381 -292
- package/src/llama.cpp/examples/server/server.cpp +134 -128
- package/src/llama.cpp/examples/server/utils.hpp +95 -106
- package/src/llama.cpp/examples/sycl/run-llama2.sh +2 -2
- package/src/llama.cpp/examples/tts/tts.cpp +251 -142
- package/src/llama.cpp/ggml/CMakeLists.txt +13 -1
- package/src/llama.cpp/ggml/include/ggml-alloc.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-backend.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-cpu.h +4 -1
- package/src/llama.cpp/ggml/include/ggml-metal.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +0 -2
- package/src/llama.cpp/ggml/include/ggml.h +6 -2
- package/src/llama.cpp/ggml/src/CMakeLists.txt +10 -7
- package/src/llama.cpp/ggml/src/ggml-alloc.c +24 -15
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +58 -54
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +3 -5
- package/src/llama.cpp/ggml/src/ggml-common.h +0 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +132 -17
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +156 -11
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +2235 -641
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1572 -198
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +24 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +259 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +61 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +288 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +9 -8
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +16 -3
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +14 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -5
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +235 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +6 -2
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +1 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +246 -120
- package/src/llama.cpp/ggml/src/ggml-quants.c +114 -114
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +51 -10
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +33 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +136 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +308 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +174 -728
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -77
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +949 -602
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +37 -3
- package/src/llama.cpp/ggml/src/ggml.c +9 -4
- package/src/llama.cpp/include/llama.h +32 -14
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +1 -0
- package/src/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
- package/src/llama.cpp/requirements.txt +1 -0
- package/src/llama.cpp/src/llama-arch.cpp +21 -0
- package/src/llama.cpp/src/llama-arch.h +1 -0
- package/src/llama.cpp/src/llama-chat.cpp +1 -0
- package/src/llama.cpp/src/llama-grammar.cpp +183 -183
- package/src/llama.cpp/src/llama-grammar.h +13 -4
- package/src/llama.cpp/src/llama-impl.h +6 -6
- package/src/llama.cpp/src/llama-kv-cache.h +2 -1
- package/src/llama.cpp/src/llama-mmap.cpp +11 -1
- package/src/llama.cpp/src/llama-mmap.h +1 -0
- package/src/llama.cpp/src/llama-model.cpp +70 -6
- package/src/llama.cpp/src/llama-sampling.cpp +174 -67
- package/src/llama.cpp/src/llama-vocab.cpp +12 -0
- package/src/llama.cpp/src/llama.cpp +154 -5
- package/src/llama.cpp/src/unicode.cpp +9 -2
- package/src/llama.cpp/tests/test-backend-ops.cpp +171 -115
- package/src/llama.cpp/tests/test-chat-template.cpp +32 -22
- package/src/llama.cpp/tests/test-chat.cpp +691 -325
- package/src/llama.cpp/tests/test-gguf.cpp +4 -4
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +63 -63
- package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -9
- package/src/llama.cpp/tests/test-sampling.cpp +15 -0
- package/src/llama.cpp/Sources/llama/llama.h +0 -4
- package/src/llama.cpp/common/chat.hpp +0 -52
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/bin/win32/arm64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/binding.ts
CHANGED
|
@@ -8,6 +8,7 @@ export type ChatMessage = {
|
|
|
8
8
|
export type LlamaModelOptions = {
|
|
9
9
|
model: string
|
|
10
10
|
chat_template?: string
|
|
11
|
+
reasoning_format?: string
|
|
11
12
|
embedding?: boolean
|
|
12
13
|
embd_normalize?: number
|
|
13
14
|
pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
|
|
@@ -86,7 +87,7 @@ export type LlamaCompletionOptions = {
|
|
|
86
87
|
stop?: string[]
|
|
87
88
|
grammar?: string
|
|
88
89
|
grammar_lazy?: boolean
|
|
89
|
-
grammar_triggers?: { word: string; at_start: boolean }[]
|
|
90
|
+
grammar_triggers?: { type: number; word: string; at_start: boolean }[]
|
|
90
91
|
preserved_tokens?: string[]
|
|
91
92
|
}
|
|
92
93
|
|
package/package.json
CHANGED
|
@@ -165,9 +165,17 @@ void LlamaCompletionWorker::OnOK() {
|
|
|
165
165
|
Napi::String::New(env, _result.text.c_str()));
|
|
166
166
|
|
|
167
167
|
Napi::Array tool_calls = Napi::Array::New(Napi::AsyncWorker::Env());
|
|
168
|
+
std::string * reasoning_content = nullptr;
|
|
169
|
+
std::string * content = nullptr;
|
|
168
170
|
if (!_stop) {
|
|
169
171
|
try {
|
|
170
172
|
common_chat_msg message = common_chat_parse(_result.text, static_cast<common_chat_format>(_chat_format));
|
|
173
|
+
if (!message.reasoning_content.empty()) {
|
|
174
|
+
reasoning_content = &message.reasoning_content;
|
|
175
|
+
}
|
|
176
|
+
if (!message.content.empty()) {
|
|
177
|
+
content = &message.content;
|
|
178
|
+
}
|
|
171
179
|
for (size_t i = 0; i < message.tool_calls.size(); i++) {
|
|
172
180
|
const auto &tc = message.tool_calls[i];
|
|
173
181
|
Napi::Object tool_call = Napi::Object::New(env);
|
|
@@ -188,6 +196,12 @@ void LlamaCompletionWorker::OnOK() {
|
|
|
188
196
|
if (tool_calls.Length() > 0) {
|
|
189
197
|
result.Set("tool_calls", tool_calls);
|
|
190
198
|
}
|
|
199
|
+
if (reasoning_content) {
|
|
200
|
+
result.Set("reasoning_content", Napi::String::New(env, reasoning_content->c_str()));
|
|
201
|
+
}
|
|
202
|
+
if (content) {
|
|
203
|
+
result.Set("content", Napi::String::New(env, content->c_str()));
|
|
204
|
+
}
|
|
191
205
|
|
|
192
206
|
auto ctx = _sess->context();
|
|
193
207
|
const auto timings_token = llama_perf_context(ctx);
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -185,6 +185,13 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
185
185
|
|
|
186
186
|
params.chat_template = get_option<std::string>(options, "chat_template", "");
|
|
187
187
|
|
|
188
|
+
std::string reasoning_format = get_option<std::string>(options, "reasoning_format", "none");
|
|
189
|
+
if (reasoning_format == "deepseek") {
|
|
190
|
+
params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
|
191
|
+
} else {
|
|
192
|
+
params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
|
193
|
+
}
|
|
194
|
+
|
|
188
195
|
params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
|
|
189
196
|
params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
|
|
190
197
|
params.n_ubatch = get_option<int32_t>(options, "n_ubatch", 512);
|
|
@@ -265,7 +272,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
265
272
|
_sess = sess;
|
|
266
273
|
_info = common_params_get_system_info(params);
|
|
267
274
|
|
|
268
|
-
_templates =
|
|
275
|
+
_templates = common_chat_templates_init(model, params.chat_template);
|
|
269
276
|
}
|
|
270
277
|
|
|
271
278
|
// getSystemInfo(): string
|
|
@@ -348,22 +355,22 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
|
348
355
|
Napi::Object minja = Napi::Object::New(info.Env());
|
|
349
356
|
minja.Set("default", validateModelChatTemplate(model, true, ""));
|
|
350
357
|
Napi::Object defaultCaps = Napi::Object::New(info.Env());
|
|
351
|
-
defaultCaps.Set("tools", _templates.template_default->original_caps().supports_tools);
|
|
352
|
-
defaultCaps.Set("toolCalls", _templates.template_default->original_caps().supports_tool_calls);
|
|
353
|
-
defaultCaps.Set("toolResponses", _templates.template_default->original_caps().supports_tool_responses);
|
|
354
|
-
defaultCaps.Set("systemRole", _templates.template_default->original_caps().supports_system_role);
|
|
355
|
-
defaultCaps.Set("parallelToolCalls", _templates.template_default->original_caps().supports_parallel_tool_calls);
|
|
356
|
-
defaultCaps.Set("toolCallId", _templates.template_default->original_caps().supports_tool_call_id);
|
|
358
|
+
defaultCaps.Set("tools", _templates.get()->template_default->original_caps().supports_tools);
|
|
359
|
+
defaultCaps.Set("toolCalls", _templates.get()->template_default->original_caps().supports_tool_calls);
|
|
360
|
+
defaultCaps.Set("toolResponses", _templates.get()->template_default->original_caps().supports_tool_responses);
|
|
361
|
+
defaultCaps.Set("systemRole", _templates.get()->template_default->original_caps().supports_system_role);
|
|
362
|
+
defaultCaps.Set("parallelToolCalls", _templates.get()->template_default->original_caps().supports_parallel_tool_calls);
|
|
363
|
+
defaultCaps.Set("toolCallId", _templates.get()->template_default->original_caps().supports_tool_call_id);
|
|
357
364
|
minja.Set("defaultCaps", defaultCaps);
|
|
358
365
|
minja.Set("toolUse", validateModelChatTemplate(model, true, "tool_use"));
|
|
359
|
-
if (_templates.template_tool_use) {
|
|
366
|
+
if (_templates.get()->template_tool_use) {
|
|
360
367
|
Napi::Object toolUseCaps = Napi::Object::New(info.Env());
|
|
361
|
-
toolUseCaps.Set("tools", _templates.template_tool_use->original_caps().supports_tools);
|
|
362
|
-
toolUseCaps.Set("toolCalls", _templates.template_tool_use->original_caps().supports_tool_calls);
|
|
363
|
-
toolUseCaps.Set("toolResponses", _templates.template_tool_use->original_caps().supports_tool_responses);
|
|
364
|
-
toolUseCaps.Set("systemRole", _templates.template_tool_use->original_caps().supports_system_role);
|
|
365
|
-
toolUseCaps.Set("parallelToolCalls", _templates.template_tool_use->original_caps().supports_parallel_tool_calls);
|
|
366
|
-
toolUseCaps.Set("toolCallId", _templates.template_tool_use->original_caps().supports_tool_call_id);
|
|
368
|
+
toolUseCaps.Set("tools", _templates.get()->template_tool_use->original_caps().supports_tools);
|
|
369
|
+
toolUseCaps.Set("toolCalls", _templates.get()->template_tool_use->original_caps().supports_tool_calls);
|
|
370
|
+
toolUseCaps.Set("toolResponses", _templates.get()->template_tool_use->original_caps().supports_tool_responses);
|
|
371
|
+
toolUseCaps.Set("systemRole", _templates.get()->template_tool_use->original_caps().supports_system_role);
|
|
372
|
+
toolUseCaps.Set("parallelToolCalls", _templates.get()->template_tool_use->original_caps().supports_parallel_tool_calls);
|
|
373
|
+
toolUseCaps.Set("toolCallId", _templates.get()->template_tool_use->original_caps().supports_tool_call_id);
|
|
367
374
|
minja.Set("toolUseCaps", toolUseCaps);
|
|
368
375
|
}
|
|
369
376
|
chatTemplates.Set("minja", minja);
|
|
@@ -377,8 +384,8 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
|
377
384
|
}
|
|
378
385
|
|
|
379
386
|
common_chat_params getFormattedChatWithJinja(
|
|
380
|
-
const
|
|
381
|
-
const
|
|
387
|
+
const std::shared_ptr<LlamaSession> &sess,
|
|
388
|
+
const common_chat_templates_ptr &templates,
|
|
382
389
|
const std::string &messages,
|
|
383
390
|
const std::string &chat_template,
|
|
384
391
|
const std::string &json_schema,
|
|
@@ -386,71 +393,46 @@ common_chat_params getFormattedChatWithJinja(
|
|
|
386
393
|
const bool ¶llel_tool_calls,
|
|
387
394
|
const std::string &tool_choice
|
|
388
395
|
) {
|
|
389
|
-
|
|
390
|
-
inputs.messages = json::parse(messages);
|
|
396
|
+
common_chat_templates_inputs inputs;
|
|
397
|
+
inputs.messages = common_chat_msgs_parse_oaicompat(json::parse(messages));
|
|
391
398
|
auto useTools = !tools.empty();
|
|
392
399
|
if (useTools) {
|
|
393
|
-
|
|
400
|
+
inputs.tools = common_chat_tools_parse_oaicompat(json::parse(tools));
|
|
394
401
|
}
|
|
395
402
|
inputs.parallel_tool_calls = parallel_tool_calls;
|
|
396
403
|
if (!tool_choice.empty()) {
|
|
397
|
-
|
|
404
|
+
inputs.tool_choice = common_chat_tool_choice_parse_oaicompat(tool_choice);
|
|
398
405
|
}
|
|
399
406
|
if (!json_schema.empty()) {
|
|
400
|
-
|
|
407
|
+
inputs.json_schema = json::parse(json_schema);
|
|
401
408
|
}
|
|
402
|
-
inputs.
|
|
409
|
+
inputs.extract_reasoning = sess->params().reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
|
403
410
|
|
|
404
411
|
// If chat_template is provided, create new one and use it (probably slow)
|
|
405
412
|
if (!chat_template.empty()) {
|
|
406
|
-
auto
|
|
407
|
-
|
|
408
|
-
if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
|
|
409
|
-
inputs.parallel_tool_calls = false;
|
|
410
|
-
}
|
|
411
|
-
return common_chat_params_init(*template_ptr, inputs);
|
|
413
|
+
auto tmps = common_chat_templates_init(sess->model(), chat_template);
|
|
414
|
+
return common_chat_templates_apply(tmps.get(), inputs);
|
|
412
415
|
} else {
|
|
413
|
-
|
|
414
|
-
if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
|
|
415
|
-
inputs.parallel_tool_calls = false;
|
|
416
|
-
}
|
|
417
|
-
return common_chat_params_init(*template_ptr, inputs);
|
|
416
|
+
return common_chat_templates_apply(templates.get(), inputs);
|
|
418
417
|
}
|
|
419
418
|
}
|
|
420
419
|
|
|
421
420
|
std::string getFormattedChat(
|
|
422
421
|
const struct llama_model * model,
|
|
423
|
-
const
|
|
422
|
+
const common_chat_templates_ptr &templates,
|
|
424
423
|
const std::string &messages,
|
|
425
424
|
const std::string &chat_template
|
|
426
425
|
) {
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
std::vector<common_chat_msg> chat_msgs;
|
|
431
|
-
for (const auto &msg : chat_json) {
|
|
432
|
-
chat_msgs.push_back({
|
|
433
|
-
msg["role"].get<std::string>(),
|
|
434
|
-
msg["content"].get<std::string>()
|
|
435
|
-
});
|
|
436
|
-
}
|
|
426
|
+
common_chat_templates_inputs inputs;
|
|
427
|
+
inputs.messages = common_chat_msgs_parse_oaicompat(json::parse(messages));
|
|
428
|
+
inputs.use_jinja = false;
|
|
437
429
|
|
|
438
430
|
// If chat_template is provided, create new one and use it (probably slow)
|
|
439
431
|
if (!chat_template.empty()) {
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
*tmp.template_default,
|
|
443
|
-
chat_msgs,
|
|
444
|
-
true,
|
|
445
|
-
false
|
|
446
|
-
);
|
|
432
|
+
auto tmps = common_chat_templates_init(model, chat_template);
|
|
433
|
+
return common_chat_templates_apply(tmps.get(), inputs).prompt;
|
|
447
434
|
} else {
|
|
448
|
-
|
|
449
|
-
*templates.template_default,
|
|
450
|
-
chat_msgs,
|
|
451
|
-
true,
|
|
452
|
-
false
|
|
453
|
-
);
|
|
435
|
+
return common_chat_templates_apply(templates.get(), inputs).prompt;
|
|
454
436
|
}
|
|
455
437
|
}
|
|
456
438
|
|
|
@@ -493,23 +475,24 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
|
|
|
493
475
|
auto parallel_tool_calls = get_option<bool>(params, "parallel_tool_calls", false);
|
|
494
476
|
auto tool_choice = get_option<std::string>(params, "tool_choice", "");
|
|
495
477
|
|
|
496
|
-
auto chatParams = getFormattedChatWithJinja(_sess
|
|
478
|
+
auto chatParams = getFormattedChatWithJinja(_sess, _templates, messages, chat_template, json_schema_str, tools_str, parallel_tool_calls, tool_choice);
|
|
497
479
|
|
|
498
480
|
Napi::Object result = Napi::Object::New(env);
|
|
499
|
-
result.Set("prompt", chatParams.prompt
|
|
481
|
+
result.Set("prompt", chatParams.prompt);
|
|
500
482
|
// chat_format: int
|
|
501
483
|
result.Set("chat_format", static_cast<int>(chatParams.format));
|
|
502
484
|
// grammar: string
|
|
503
485
|
result.Set("grammar", chatParams.grammar);
|
|
504
486
|
// grammar_lazy: boolean
|
|
505
487
|
result.Set("grammea_lazy", chatParams.grammar_lazy);
|
|
506
|
-
// grammar_triggers: [{
|
|
488
|
+
// grammar_triggers: [{ value: string, token: number }]
|
|
507
489
|
Napi::Array grammar_triggers = Napi::Array::New(env);
|
|
508
490
|
for (size_t i = 0; i < chatParams.grammar_triggers.size(); i++) {
|
|
509
491
|
const auto & trigger = chatParams.grammar_triggers[i];
|
|
510
492
|
Napi::Object triggerObj = Napi::Object::New(env);
|
|
511
|
-
triggerObj.Set("
|
|
512
|
-
triggerObj.Set("
|
|
493
|
+
triggerObj.Set("type", Napi::Number::New(env, trigger.type));
|
|
494
|
+
triggerObj.Set("value", Napi::String::New(env, trigger.value));
|
|
495
|
+
triggerObj.Set("token", Napi::Number::New(env, trigger.token));
|
|
513
496
|
grammar_triggers.Set(i, triggerObj);
|
|
514
497
|
}
|
|
515
498
|
result.Set("grammar_triggers", grammar_triggers);
|
|
@@ -586,6 +569,60 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
586
569
|
}
|
|
587
570
|
}
|
|
588
571
|
|
|
572
|
+
// Handle preserved_tokens from options
|
|
573
|
+
if (options.Has("preserved_tokens")) {
|
|
574
|
+
auto preserved_tokens = options.Get("preserved_tokens").As<Napi::Array>();
|
|
575
|
+
for (size_t i = 0; i < preserved_tokens.Length(); i++) {
|
|
576
|
+
auto token = preserved_tokens.Get(i).ToString().Utf8Value();
|
|
577
|
+
auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
|
|
578
|
+
if (ids.size() == 1) {
|
|
579
|
+
params.sampling.preserved_tokens.insert(ids[0]);
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
// Handle grammar_triggers from options
|
|
585
|
+
if (options.Has("grammar_triggers")) {
|
|
586
|
+
auto grammar_triggers = options.Get("grammar_triggers").As<Napi::Array>();
|
|
587
|
+
for (size_t i = 0; i < grammar_triggers.Length(); i++) {
|
|
588
|
+
auto trigger_obj = grammar_triggers.Get(i).As<Napi::Object>();
|
|
589
|
+
|
|
590
|
+
auto type = static_cast<common_grammar_trigger_type>(trigger_obj.Get("type").ToNumber().Int32Value());
|
|
591
|
+
auto word = trigger_obj.Get("value").ToString().Utf8Value();
|
|
592
|
+
|
|
593
|
+
if (type == COMMON_GRAMMAR_TRIGGER_TYPE_WORD) {
|
|
594
|
+
auto ids = common_tokenize(_sess->context(), word, /* add_special= */ false, /* parse_special= */ true);
|
|
595
|
+
if (ids.size() == 1) {
|
|
596
|
+
auto token = ids[0];
|
|
597
|
+
if (std::find(params.sampling.preserved_tokens.begin(), params.sampling.preserved_tokens.end(), (llama_token) token) == params.sampling.preserved_tokens.end()) {
|
|
598
|
+
throw std::runtime_error("Grammar trigger word should be marked as preserved token");
|
|
599
|
+
}
|
|
600
|
+
common_grammar_trigger trigger;
|
|
601
|
+
trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN;
|
|
602
|
+
trigger.value = word;
|
|
603
|
+
trigger.token = token;
|
|
604
|
+
params.sampling.grammar_triggers.push_back(std::move(trigger));
|
|
605
|
+
} else {
|
|
606
|
+
params.sampling.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, word});
|
|
607
|
+
}
|
|
608
|
+
} else {
|
|
609
|
+
common_grammar_trigger trigger;
|
|
610
|
+
trigger.type = type;
|
|
611
|
+
trigger.value = word;
|
|
612
|
+
if (type == COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN) {
|
|
613
|
+
auto token = (llama_token) trigger_obj.Get("token").ToNumber().Int32Value();
|
|
614
|
+
trigger.token = token;
|
|
615
|
+
}
|
|
616
|
+
params.sampling.grammar_triggers.push_back(std::move(trigger));
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
// Handle grammar_lazy from options
|
|
622
|
+
if (options.Has("grammar_lazy")) {
|
|
623
|
+
params.sampling.grammar_lazy = options.Get("grammar_lazy").ToBoolean().Value();
|
|
624
|
+
}
|
|
625
|
+
|
|
589
626
|
if (options.Has("messages") && options.Get("messages").IsArray()) {
|
|
590
627
|
auto messages = options.Get("messages").As<Napi::Array>();
|
|
591
628
|
auto chat_template = get_option<std::string>(options, "chat_template", "");
|
|
@@ -598,7 +635,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
598
635
|
auto tool_choice = get_option<std::string>(options, "tool_choice", "none");
|
|
599
636
|
|
|
600
637
|
auto chatParams = getFormattedChatWithJinja(
|
|
601
|
-
_sess
|
|
638
|
+
_sess,
|
|
602
639
|
_templates,
|
|
603
640
|
json_stringify(messages),
|
|
604
641
|
chat_template,
|
|
@@ -608,33 +645,26 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
608
645
|
tool_choice
|
|
609
646
|
);
|
|
610
647
|
|
|
611
|
-
params.prompt = chatParams.prompt
|
|
648
|
+
params.prompt = chatParams.prompt;
|
|
612
649
|
|
|
613
650
|
chat_format = chatParams.format;
|
|
614
651
|
|
|
652
|
+
for (const auto & token : chatParams.preserved_tokens) {
|
|
653
|
+
auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
|
|
654
|
+
if (ids.size() == 1) {
|
|
655
|
+
params.sampling.preserved_tokens.insert(ids[0]);
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
|
|
615
659
|
if (!has_grammar_set) {
|
|
616
660
|
// grammar param always wins jinja template & json_schema
|
|
617
661
|
params.sampling.grammar = chatParams.grammar;
|
|
618
662
|
params.sampling.grammar_lazy = chatParams.grammar_lazy;
|
|
619
|
-
|
|
620
663
|
for (const auto & trigger : chatParams.grammar_triggers) {
|
|
621
|
-
|
|
622
|
-
if (ids.size() == 1) {
|
|
623
|
-
params.sampling.grammar_trigger_tokens.push_back(ids[0]);
|
|
624
|
-
params.sampling.preserved_tokens.insert(ids[0]);
|
|
625
|
-
continue;
|
|
626
|
-
}
|
|
627
|
-
params.sampling.grammar_trigger_words.push_back(trigger);
|
|
664
|
+
params.sampling.grammar_triggers.push_back(trigger);
|
|
628
665
|
}
|
|
629
666
|
has_grammar_set = true;
|
|
630
667
|
}
|
|
631
|
-
|
|
632
|
-
for (const auto & token : chatParams.preserved_tokens) {
|
|
633
|
-
auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
|
|
634
|
-
if (ids.size() == 1) {
|
|
635
|
-
params.sampling.preserved_tokens.insert(ids[0]);
|
|
636
|
-
}
|
|
637
|
-
}
|
|
638
668
|
|
|
639
669
|
for (const auto & stop : chatParams.additional_stops) {
|
|
640
670
|
stop_words.push_back(stop);
|
|
@@ -685,6 +715,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
685
715
|
params.sampling.dry_base = get_option<float>(options, "dry_base", 2);
|
|
686
716
|
params.sampling.dry_allowed_length = get_option<float>(options, "dry_allowed_length", -1);
|
|
687
717
|
params.sampling.dry_penalty_last_n = get_option<float>(options, "dry_penalty_last_n", 0);
|
|
718
|
+
params.sampling.top_n_sigma = get_option<float>(options, "top_n_sigma", -1.0f);
|
|
688
719
|
params.sampling.ignore_eos = get_option<bool>(options, "ignore_eos", false);
|
|
689
720
|
params.n_keep = get_option<int32_t>(options, "n_keep", 0);
|
|
690
721
|
params.sampling.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
|
package/src/LlamaContext.h
CHANGED
|
@@ -28,7 +28,7 @@ private:
|
|
|
28
28
|
std::string _info;
|
|
29
29
|
Napi::Object _meta;
|
|
30
30
|
LlamaSessionPtr _sess = nullptr;
|
|
31
|
-
|
|
31
|
+
common_chat_templates_ptr _templates;
|
|
32
32
|
std::vector<common_adapter_lora_info> _lora;
|
|
33
33
|
LlamaCompletionWorker *_wip = nullptr;
|
|
34
34
|
};
|