@fugood/llama.node 1.4.15 → 1.6.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +1 -5
- package/lib/index.js +2 -2
- package/lib/index.ts +2 -2
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +76 -61
- package/src/LlamaContext.cpp +20 -32
- package/src/llama.cpp/common/CMakeLists.txt +12 -0
- package/src/llama.cpp/common/arg.cpp +20 -0
- package/src/llama.cpp/common/chat-parser.cpp +3 -3
- package/src/llama.cpp/common/chat-parser.h +4 -4
- package/src/llama.cpp/common/chat.cpp +289 -34
- package/src/llama.cpp/common/chat.h +32 -20
- package/src/llama.cpp/common/common.cpp +0 -1
- package/src/llama.cpp/common/common.h +31 -25
- package/src/llama.cpp/common/download.cpp +19 -14
- package/src/llama.cpp/common/jinja/caps.cpp +237 -0
- package/src/llama.cpp/common/jinja/caps.h +24 -0
- package/src/llama.cpp/common/jinja/lexer.cpp +341 -0
- package/src/llama.cpp/common/jinja/lexer.h +157 -0
- package/src/llama.cpp/common/jinja/parser.cpp +591 -0
- package/src/llama.cpp/common/jinja/parser.h +21 -0
- package/src/llama.cpp/common/jinja/runtime.cpp +865 -0
- package/src/llama.cpp/common/jinja/runtime.h +628 -0
- package/src/llama.cpp/common/jinja/string.cpp +207 -0
- package/src/llama.cpp/common/jinja/string.h +58 -0
- package/src/llama.cpp/common/jinja/utils.h +49 -0
- package/src/llama.cpp/common/jinja/value.cpp +1221 -0
- package/src/llama.cpp/common/jinja/value.h +464 -0
- package/src/llama.cpp/common/json-partial.h +1 -0
- package/src/llama.cpp/common/sampling.cpp +52 -19
- package/src/llama.cpp/ggml/include/ggml.h +39 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +63 -37
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +31 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +18 -0
- package/src/llama.cpp/include/llama-cpp.h +3 -1
- package/src/llama.cpp/include/llama.h +29 -2
- package/src/llama.cpp/src/llama-adapter.cpp +7 -13
- package/src/llama.cpp/src/llama-adapter.h +1 -3
- package/src/llama.cpp/src/llama-context.cpp +232 -144
- package/src/llama.cpp/src/llama-context.h +10 -0
- package/src/llama.cpp/src/llama-cparams.h +2 -0
- package/src/llama.cpp/src/llama-hparams.cpp +0 -36
- package/src/llama.cpp/src/llama-hparams.h +38 -1
- package/src/llama.cpp/src/llama-kv-cache.cpp +201 -59
- package/src/llama.cpp/src/llama-kv-cache.h +0 -2
- package/src/llama.cpp/src/llama-mmap.cpp +5 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +21 -7
- package/src/llama.cpp/src/llama-model.cpp +5 -1
- package/src/llama.cpp/src/llama-model.h +3 -2
- package/src/llama.cpp/src/llama-sampling.cpp +170 -13
package/lib/binding.ts
CHANGED
|
@@ -374,24 +374,20 @@ export type ModelInfo = {
|
|
|
374
374
|
is_hybrid: boolean
|
|
375
375
|
chatTemplates: {
|
|
376
376
|
llamaChat: boolean
|
|
377
|
-
|
|
377
|
+
jinja: {
|
|
378
378
|
default: boolean
|
|
379
379
|
defaultCaps: {
|
|
380
380
|
tools: boolean
|
|
381
381
|
toolCalls: boolean
|
|
382
|
-
toolResponses: boolean
|
|
383
382
|
systemRole: boolean
|
|
384
383
|
parallelToolCalls: boolean
|
|
385
|
-
toolCallId: boolean
|
|
386
384
|
}
|
|
387
385
|
toolUse: boolean
|
|
388
386
|
toolUseCaps?: {
|
|
389
387
|
tools: boolean
|
|
390
388
|
toolCalls: boolean
|
|
391
|
-
toolResponses: boolean
|
|
392
389
|
systemRole: boolean
|
|
393
390
|
parallelToolCalls: boolean
|
|
394
|
-
toolCallId: boolean
|
|
395
391
|
}
|
|
396
392
|
}
|
|
397
393
|
}
|
package/lib/index.js
CHANGED
|
@@ -80,8 +80,8 @@ class LlamaContextWrapper {
|
|
|
80
80
|
return this.ctx.getUsedDevices();
|
|
81
81
|
}
|
|
82
82
|
isJinjaSupported() {
|
|
83
|
-
const {
|
|
84
|
-
return !!(
|
|
83
|
+
const { jinja } = this.ctx.getModelInfo().chatTemplates;
|
|
84
|
+
return !!(jinja === null || jinja === void 0 ? void 0 : jinja.toolUse) || !!(jinja === null || jinja === void 0 ? void 0 : jinja.default);
|
|
85
85
|
}
|
|
86
86
|
isLlamaChatSupported() {
|
|
87
87
|
return !!this.ctx.getModelInfo().chatTemplates.llamaChat;
|
package/lib/index.ts
CHANGED
|
@@ -100,8 +100,8 @@ class LlamaContextWrapper {
|
|
|
100
100
|
}
|
|
101
101
|
|
|
102
102
|
isJinjaSupported(): boolean {
|
|
103
|
-
const {
|
|
104
|
-
return !!
|
|
103
|
+
const { jinja } = this.ctx.getModelInfo().chatTemplates
|
|
104
|
+
return !!jinja?.toolUse || !!jinja?.default
|
|
105
105
|
}
|
|
106
106
|
|
|
107
107
|
isLlamaChatSupported(): boolean {
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.6.0-rc.0",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,20 +72,20 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-darwin-arm64": "1.
|
|
76
|
-
"@fugood/node-llama-darwin-x64": "1.
|
|
77
|
-
"@fugood/node-llama-linux-arm64": "1.
|
|
78
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.
|
|
79
|
-
"@fugood/node-llama-linux-arm64-snapdragon": "1.
|
|
80
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.
|
|
81
|
-
"@fugood/node-llama-linux-x64": "1.
|
|
82
|
-
"@fugood/node-llama-linux-x64-cuda": "1.
|
|
83
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.
|
|
84
|
-
"@fugood/node-llama-win32-arm64": "1.
|
|
85
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.
|
|
86
|
-
"@fugood/node-llama-win32-x64": "1.
|
|
87
|
-
"@fugood/node-llama-win32-x64-cuda": "1.
|
|
88
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.
|
|
75
|
+
"@fugood/node-llama-darwin-arm64": "1.6.0-rc.0",
|
|
76
|
+
"@fugood/node-llama-darwin-x64": "1.6.0-rc.0",
|
|
77
|
+
"@fugood/node-llama-linux-arm64": "1.6.0-rc.0",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.6.0-rc.0",
|
|
79
|
+
"@fugood/node-llama-linux-arm64-snapdragon": "1.6.0-rc.0",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.6.0-rc.0",
|
|
81
|
+
"@fugood/node-llama-linux-x64": "1.6.0-rc.0",
|
|
82
|
+
"@fugood/node-llama-linux-x64-cuda": "1.6.0-rc.0",
|
|
83
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.6.0-rc.0",
|
|
84
|
+
"@fugood/node-llama-win32-arm64": "1.6.0-rc.0",
|
|
85
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.6.0-rc.0",
|
|
86
|
+
"@fugood/node-llama-win32-x64": "1.6.0-rc.0",
|
|
87
|
+
"@fugood/node-llama-win32-x64-cuda": "1.6.0-rc.0",
|
|
88
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.6.0-rc.0"
|
|
89
89
|
},
|
|
90
90
|
"devDependencies": {
|
|
91
91
|
"@babel/preset-env": "^7.24.4",
|
package/scripts/llama.cpp.patch
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
diff --git a/src/llama.cpp/common/CMakeLists.txt b/src/llama.cpp/common/CMakeLists.txt
|
|
2
|
-
index
|
|
2
|
+
index ae02c0bd7..f74d8bb26 100644
|
|
3
3
|
--- a/src/llama.cpp/common/CMakeLists.txt
|
|
4
4
|
+++ b/src/llama.cpp/common/CMakeLists.txt
|
|
5
|
-
@@ -
|
|
5
|
+
@@ -158,4 +158,11 @@ if (LLAMA_LLGUIDANCE)
|
|
6
6
|
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
|
|
7
7
|
endif ()
|
|
8
8
|
|
|
@@ -29,74 +29,89 @@ index 1bcba9cd8..b7cd68734 100644
|
|
|
29
29
|
static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
|
|
30
30
|
int count = 0;
|
|
31
31
|
diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
|
|
32
|
-
index
|
|
32
|
+
index b29544dac..5fa2c6c17 100644
|
|
33
33
|
--- a/src/llama.cpp/common/chat.cpp
|
|
34
34
|
+++ b/src/llama.cpp/common/chat.cpp
|
|
35
|
-
@@ -
|
|
36
|
-
|
|
37
|
-
#include "regex-partial.h"
|
|
38
|
-
|
|
39
|
-
-#include <minja/chat-template.hpp>
|
|
40
|
-
-#include <minja/minja.hpp>
|
|
41
|
-
-
|
|
42
|
-
#include <algorithm>
|
|
43
|
-
#include <cstdio>
|
|
44
|
-
#include <cctype>
|
|
45
|
-
@@ -135,16 +132,6 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
|
|
46
|
-
return diffs;
|
|
35
|
+
@@ -615,6 +615,37 @@ std::string common_chat_templates_source(const struct common_chat_templates * tm
|
|
36
|
+
return tmpls->template_default->source();
|
|
47
37
|
}
|
|
48
38
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
39
|
+
+common_chat_template_caps common_chat_templates_get_caps(const struct common_chat_templates * tmpls, const std::string & variant) {
|
|
40
|
+
+ common_chat_template_caps result;
|
|
41
|
+
+ const common_chat_template * tmpl = nullptr;
|
|
42
|
+
+
|
|
43
|
+
+ if (!variant.empty() && variant == "tool_use") {
|
|
44
|
+
+ tmpl = tmpls->template_tool_use.get();
|
|
45
|
+
+ } else {
|
|
46
|
+
+ tmpl = tmpls->template_default.get();
|
|
47
|
+
+ }
|
|
48
|
+
+
|
|
49
|
+
+ if (tmpl) {
|
|
50
|
+
+ auto caps = tmpl->original_caps();
|
|
51
|
+
+ result.supports_tools = caps.supports_tools;
|
|
52
|
+
+ result.supports_tool_calls = caps.supports_tool_calls;
|
|
53
|
+
+ result.supports_system_role = caps.supports_system_role;
|
|
54
|
+
+ result.supports_parallel_tool_calls = caps.supports_parallel_tool_calls;
|
|
55
|
+
+ }
|
|
56
|
+
+
|
|
57
|
+
+ return result;
|
|
58
|
+
+}
|
|
59
|
+
+
|
|
60
|
+
+bool common_chat_templates_has_variant(const struct common_chat_templates * tmpls, const std::string & variant) {
|
|
61
|
+
+ if (variant.empty() || variant == "default") {
|
|
62
|
+
+ return tmpls->template_default != nullptr;
|
|
63
|
+
+ }
|
|
64
|
+
+ if (variant == "tool_use") {
|
|
65
|
+
+ return tmpls->template_tool_use != nullptr;
|
|
66
|
+
+ }
|
|
67
|
+
+ return false;
|
|
68
|
+
+}
|
|
69
|
+
+
|
|
70
|
+
common_chat_templates_ptr common_chat_templates_init(
|
|
71
|
+
const struct llama_model * model,
|
|
72
|
+
const std::string & chat_template_override,
|
|
73
|
+
@@ -831,8 +862,9 @@ static std::string apply(
|
|
74
|
+
if (inputs.add_generation_prompt) {
|
|
75
|
+
inp["add_generation_prompt"] = true;
|
|
76
|
+
}
|
|
77
|
+
- if (inp["tools"].is_null()) {
|
|
78
|
+
- inp["tools"] = json::array();
|
|
79
|
+
+ // Remove tools key when null, so templates can check "{% if tools is defined %}"
|
|
80
|
+
+ if (inp["tools"].is_null() || (inp["tools"].is_array() && inp["tools"].empty())) {
|
|
81
|
+
+ inp.erase("tools");
|
|
64
82
|
}
|
|
65
|
-
// TODO: add flag to control date/time, if only for testing purposes.
|
|
66
|
-
- // tmpl_inputs.now = std::chrono::system_clock::now();
|
|
67
|
-
+ tmpl_inputs.now = inputs.now;
|
|
68
83
|
|
|
69
|
-
|
|
70
|
-
// To avoid double BOS / EOS tokens, we're manually removing begining / trailing tokens
|
|
84
|
+
jinja::global_from_json(ctx, inp, inputs.mark_input);
|
|
71
85
|
diff --git a/src/llama.cpp/common/chat.h b/src/llama.cpp/common/chat.h
|
|
72
|
-
index
|
|
86
|
+
index ac19348ec..f6f9f612f 100644
|
|
73
87
|
--- a/src/llama.cpp/common/chat.h
|
|
74
88
|
+++ b/src/llama.cpp/common/chat.h
|
|
75
|
-
@@ -
|
|
76
|
-
|
|
77
|
-
|
|
89
|
+
@@ -231,6 +231,20 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
|
|
90
|
+
|
|
91
|
+
bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates);
|
|
78
92
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
93
|
+
+// Template capabilities structure (for exposing capabilities to external code)
|
|
94
|
+
+struct common_chat_template_caps {
|
|
95
|
+
+ bool supports_tools = true;
|
|
96
|
+
+ bool supports_tool_calls = true;
|
|
97
|
+
+ bool supports_system_role = true;
|
|
98
|
+
+ bool supports_parallel_tool_calls = true;
|
|
99
|
+
+};
|
|
82
100
|
+
|
|
83
|
-
|
|
101
|
+
+// Get template capabilities for a specific variant ("" for default, "tool_use" for tool_use template)
|
|
102
|
+
+common_chat_template_caps common_chat_templates_get_caps(const struct common_chat_templates * tmpls, const std::string & variant = "");
|
|
84
103
|
+
|
|
85
|
-
|
|
86
|
-
+
|
|
87
|
-
+
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
+};
|
|
92
|
-
|
|
93
|
-
struct common_chat_tool_call {
|
|
94
|
-
std::string name;
|
|
104
|
+
+// Check if a template variant exists
|
|
105
|
+
+bool common_chat_templates_has_variant(const struct common_chat_templates * tmpls, const std::string & variant);
|
|
106
|
+
+
|
|
107
|
+
// Parses a JSON array of messages in OpenAI's chat completion API format.
|
|
108
|
+
// T can be std::string containing JSON or nlohmann::ordered_json
|
|
109
|
+
template <class T> std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const T & messages);
|
|
95
110
|
diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
|
|
96
|
-
index
|
|
111
|
+
index 26250abb6..72ceddcc7 100644
|
|
97
112
|
--- a/src/llama.cpp/common/common.cpp
|
|
98
113
|
+++ b/src/llama.cpp/common/common.cpp
|
|
99
|
-
@@ -
|
|
114
|
+
@@ -1360,6 +1360,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
|
100
115
|
mparams.devices = params.devices.data();
|
|
101
116
|
}
|
|
102
117
|
|
|
@@ -105,10 +120,10 @@ index 744f0b4ee..04fcebb9e 100644
|
|
|
105
120
|
mparams.main_gpu = params.main_gpu;
|
|
106
121
|
mparams.split_mode = params.split_mode;
|
|
107
122
|
diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
|
|
108
|
-
index
|
|
123
|
+
index 96c990c05..c0b0b3093 100644
|
|
109
124
|
--- a/src/llama.cpp/common/common.h
|
|
110
125
|
+++ b/src/llama.cpp/common/common.h
|
|
111
|
-
@@ -
|
|
126
|
+
@@ -317,6 +317,7 @@ struct lr_opt {
|
|
112
127
|
struct ggml_opt_optimizer_params common_opt_lr_pars(void * userdata);
|
|
113
128
|
|
|
114
129
|
struct common_params {
|
|
@@ -130,10 +145,10 @@ index 7622d0bf4..d2edcfddb 100644
|
|
|
130
145
|
check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
|
|
131
146
|
if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
|
|
132
147
|
diff --git a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
133
|
-
index
|
|
148
|
+
index 5b835c11c..681c00504 100644
|
|
134
149
|
--- a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
135
150
|
+++ b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
136
|
-
@@ -
|
|
151
|
+
@@ -2819,9 +2819,24 @@ static const char * ggml_backend_hexagon_device_get_description(ggml_backend_dev
|
|
137
152
|
GGML_UNUSED(dev);
|
|
138
153
|
}
|
|
139
154
|
|
|
@@ -160,7 +175,7 @@ index 365a24b49..83bf4ee62 100644
|
|
|
160
175
|
*total = *free;
|
|
161
176
|
|
|
162
177
|
GGML_UNUSED(dev);
|
|
163
|
-
@@ -
|
|
178
|
+
@@ -3056,10 +3071,17 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
|
|
164
179
|
}
|
|
165
180
|
}
|
|
166
181
|
|
|
@@ -182,7 +197,7 @@ index 365a24b49..83bf4ee62 100644
|
|
|
182
197
|
|
|
183
198
|
GGML_LOG_INFO("ggml-hex: Hexagon Arch version v%d\n", opt_arch);
|
|
184
199
|
|
|
185
|
-
@@ -
|
|
200
|
+
@@ -3072,6 +3094,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
|
|
186
201
|
} catch (const std::exception & exc) {
|
|
187
202
|
GGML_LOG_ERROR("ggml-hex: failed to create device/session %zu\n", i);
|
|
188
203
|
devices[i].context = nullptr;
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
#include "DecodeAudioTokenWorker.h"
|
|
11
11
|
#include "ggml.h"
|
|
12
12
|
#include "gguf.h"
|
|
13
|
+
#include "chat.h"
|
|
13
14
|
#include "json-schema-to-grammar.h"
|
|
14
15
|
#include <nlohmann/json.hpp>
|
|
15
16
|
#include "llama-impl.h"
|
|
@@ -600,40 +601,27 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
|
600
601
|
|
|
601
602
|
Napi::Object chatTemplates = Napi::Object::New(info.Env());
|
|
602
603
|
chatTemplates.Set("llamaChat", _rn_ctx->validateModelChatTemplate(false, nullptr));
|
|
603
|
-
Napi::Object
|
|
604
|
-
|
|
604
|
+
Napi::Object jinja = Napi::Object::New(info.Env());
|
|
605
|
+
jinja.Set("default", _rn_ctx->validateModelChatTemplate(true, nullptr));
|
|
605
606
|
Napi::Object defaultCaps = Napi::Object::New(info.Env());
|
|
606
|
-
auto
|
|
607
|
-
|
|
608
|
-
defaultCaps.Set(
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
defaultCaps.Set(
|
|
616
|
-
"systemRole",
|
|
617
|
-
default_tmpl_caps.supports_system_role);
|
|
618
|
-
defaultCaps.Set("parallelToolCalls", default_tmpl_caps.supports_parallel_tool_calls);
|
|
619
|
-
defaultCaps.Set("toolCallId", default_tmpl_caps.supports_tool_call_id);
|
|
620
|
-
minja.Set("defaultCaps", defaultCaps);
|
|
621
|
-
minja.Set("toolUse", _rn_ctx->validateModelChatTemplate(true, "tool_use"));
|
|
622
|
-
if (_rn_ctx->validateModelChatTemplate(true, "tool_use")) {
|
|
607
|
+
auto default_caps = common_chat_templates_get_caps(_rn_ctx->templates.get(), "");
|
|
608
|
+
defaultCaps.Set("tools", default_caps.supports_tools);
|
|
609
|
+
defaultCaps.Set("toolCalls", default_caps.supports_tool_calls);
|
|
610
|
+
defaultCaps.Set("systemRole", default_caps.supports_system_role);
|
|
611
|
+
defaultCaps.Set("parallelToolCalls", default_caps.supports_parallel_tool_calls);
|
|
612
|
+
jinja.Set("defaultCaps", defaultCaps);
|
|
613
|
+
bool hasToolUse = common_chat_templates_has_variant(_rn_ctx->templates.get(), "tool_use");
|
|
614
|
+
jinja.Set("toolUse", hasToolUse);
|
|
615
|
+
if (hasToolUse) {
|
|
623
616
|
Napi::Object toolUseCaps = Napi::Object::New(info.Env());
|
|
624
|
-
auto
|
|
625
|
-
|
|
626
|
-
toolUseCaps.Set(
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
toolUseCaps.Set("parallelToolCalls", tool_use_tmpl_caps.supports_parallel_tool_calls);
|
|
633
|
-
toolUseCaps.Set("toolCallId", tool_use_tmpl_caps.supports_tool_call_id);
|
|
634
|
-
minja.Set("toolUseCaps", toolUseCaps);
|
|
635
|
-
}
|
|
636
|
-
chatTemplates.Set("minja", minja);
|
|
617
|
+
auto tool_use_caps = common_chat_templates_get_caps(_rn_ctx->templates.get(), "tool_use");
|
|
618
|
+
toolUseCaps.Set("tools", tool_use_caps.supports_tools);
|
|
619
|
+
toolUseCaps.Set("toolCalls", tool_use_caps.supports_tool_calls);
|
|
620
|
+
toolUseCaps.Set("systemRole", tool_use_caps.supports_system_role);
|
|
621
|
+
toolUseCaps.Set("parallelToolCalls", tool_use_caps.supports_parallel_tool_calls);
|
|
622
|
+
jinja.Set("toolUseCaps", toolUseCaps);
|
|
623
|
+
}
|
|
624
|
+
chatTemplates.Set("jinja", jinja);
|
|
637
625
|
details.Set("chatTemplates", chatTemplates);
|
|
638
626
|
|
|
639
627
|
details.Set("metadata", metadata);
|
|
@@ -85,6 +85,18 @@ add_library(${TARGET} STATIC
|
|
|
85
85
|
speculative.h
|
|
86
86
|
unicode.cpp
|
|
87
87
|
unicode.h
|
|
88
|
+
jinja/lexer.cpp
|
|
89
|
+
jinja/lexer.h
|
|
90
|
+
jinja/parser.cpp
|
|
91
|
+
jinja/parser.h
|
|
92
|
+
jinja/runtime.cpp
|
|
93
|
+
jinja/runtime.h
|
|
94
|
+
jinja/value.cpp
|
|
95
|
+
jinja/value.h
|
|
96
|
+
jinja/string.cpp
|
|
97
|
+
jinja/string.h
|
|
98
|
+
jinja/caps.cpp
|
|
99
|
+
jinja/caps.h
|
|
88
100
|
)
|
|
89
101
|
|
|
90
102
|
target_include_directories(${TARGET} PUBLIC . ../vendor)
|
|
@@ -1729,6 +1729,26 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1729
1729
|
}
|
|
1730
1730
|
}
|
|
1731
1731
|
).set_sparam());
|
|
1732
|
+
add_opt(common_arg(
|
|
1733
|
+
{"--adaptive-target"}, "N",
|
|
1734
|
+
string_format("adaptive-p: select tokens near this probability (valid range 0.0 "
|
|
1735
|
+
"to 1.0; negative = disabled) (default: %.2f)\n"
|
|
1736
|
+
"[(more info)](https://github.com/ggml-org/llama.cpp/pull/17927)",
|
|
1737
|
+
(double)params.sampling.adaptive_target),
|
|
1738
|
+
[](common_params & params, const std::string & value) {
|
|
1739
|
+
params.sampling.adaptive_target = std::stof(value);
|
|
1740
|
+
}
|
|
1741
|
+
).set_sparam());
|
|
1742
|
+
add_opt(common_arg(
|
|
1743
|
+
{"--adaptive-decay"}, "N",
|
|
1744
|
+
string_format("adaptive-p: decay rate for target adaptation over time. lower values "
|
|
1745
|
+
"are more reactive, higher values are more stable.\n"
|
|
1746
|
+
"(valid range 0.0 to 0.99) (default: %.2f)",
|
|
1747
|
+
(double)params.sampling.adaptive_decay),
|
|
1748
|
+
[](common_params & params, const std::string & value) {
|
|
1749
|
+
params.sampling.adaptive_decay = std::stof(value);
|
|
1750
|
+
}
|
|
1751
|
+
).set_sparam());
|
|
1732
1752
|
add_opt(common_arg(
|
|
1733
1753
|
{"--dynatemp-range"}, "N",
|
|
1734
1754
|
string_format("dynamic temperature range (default: %.1f, 0.0 = disabled)", (double)params.sampling.dynatemp_range),
|
|
@@ -129,7 +129,7 @@ static void parse_json_tool_calls(
|
|
|
129
129
|
}
|
|
130
130
|
}
|
|
131
131
|
|
|
132
|
-
common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const
|
|
132
|
+
common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax)
|
|
133
133
|
: input_(input), is_partial_(is_partial), syntax_(syntax)
|
|
134
134
|
{
|
|
135
135
|
result_.role = "assistant";
|
|
@@ -1611,7 +1611,7 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
|
|
|
1611
1611
|
builder.finish();
|
|
1612
1612
|
}
|
|
1613
1613
|
|
|
1614
|
-
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const
|
|
1614
|
+
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
|
|
1615
1615
|
if (syntax.format == COMMON_CHAT_FORMAT_PEG_SIMPLE ||
|
|
1616
1616
|
syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE ||
|
|
1617
1617
|
syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
|
|
@@ -1635,7 +1635,7 @@ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, co
|
|
|
1635
1635
|
return msg;
|
|
1636
1636
|
}
|
|
1637
1637
|
|
|
1638
|
-
common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const
|
|
1638
|
+
common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
|
|
1639
1639
|
if (parser.empty()) {
|
|
1640
1640
|
throw std::runtime_error("Failed to parse due to missing parser definition.");
|
|
1641
1641
|
}
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
#include "json-partial.h"
|
|
6
6
|
#include "regex-partial.h"
|
|
7
7
|
|
|
8
|
-
#include <nlohmann/
|
|
8
|
+
#include <nlohmann/json_fwd.hpp>
|
|
9
9
|
|
|
10
10
|
#include <optional>
|
|
11
11
|
#include <string>
|
|
@@ -19,20 +19,20 @@ class common_chat_msg_partial_exception : public std::runtime_error {
|
|
|
19
19
|
class common_chat_msg_parser {
|
|
20
20
|
std::string input_;
|
|
21
21
|
bool is_partial_;
|
|
22
|
-
|
|
22
|
+
common_chat_parser_params syntax_; // TODO: rename to params
|
|
23
23
|
std::string healing_marker_;
|
|
24
24
|
|
|
25
25
|
size_t pos_ = 0;
|
|
26
26
|
common_chat_msg result_;
|
|
27
27
|
|
|
28
28
|
public:
|
|
29
|
-
common_chat_msg_parser(const std::string & input, bool is_partial, const
|
|
29
|
+
common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
|
|
30
30
|
const std::string & input() const { return input_; }
|
|
31
31
|
size_t pos() const { return pos_; }
|
|
32
32
|
const std::string & healing_marker() const { return healing_marker_; }
|
|
33
33
|
const bool & is_partial() const { return is_partial_; }
|
|
34
34
|
const common_chat_msg & result() const { return result_; }
|
|
35
|
-
const
|
|
35
|
+
const common_chat_parser_params & syntax() const { return syntax_; }
|
|
36
36
|
|
|
37
37
|
void move_to(size_t pos) {
|
|
38
38
|
if (pos > input_.size()) {
|