@fugood/llama.node 1.4.15 → 1.5.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +1 -5
- package/lib/index.js +2 -2
- package/lib/index.ts +2 -2
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +76 -61
- package/src/LlamaContext.cpp +20 -32
- package/src/llama.cpp/common/CMakeLists.txt +12 -0
- package/src/llama.cpp/common/arg.cpp +20 -0
- package/src/llama.cpp/common/chat.cpp +289 -34
- package/src/llama.cpp/common/chat.h +16 -13
- package/src/llama.cpp/common/common.cpp +0 -1
- package/src/llama.cpp/common/common.h +28 -25
- package/src/llama.cpp/common/jinja/caps.cpp +237 -0
- package/src/llama.cpp/common/jinja/caps.h +24 -0
- package/src/llama.cpp/common/jinja/lexer.cpp +341 -0
- package/src/llama.cpp/common/jinja/lexer.h +157 -0
- package/src/llama.cpp/common/jinja/parser.cpp +591 -0
- package/src/llama.cpp/common/jinja/parser.h +21 -0
- package/src/llama.cpp/common/jinja/runtime.cpp +865 -0
- package/src/llama.cpp/common/jinja/runtime.h +628 -0
- package/src/llama.cpp/common/jinja/string.cpp +207 -0
- package/src/llama.cpp/common/jinja/string.h +58 -0
- package/src/llama.cpp/common/jinja/utils.h +49 -0
- package/src/llama.cpp/common/jinja/value.cpp +1221 -0
- package/src/llama.cpp/common/jinja/value.h +464 -0
- package/src/llama.cpp/common/sampling.cpp +52 -19
- package/src/llama.cpp/ggml/include/ggml.h +39 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +63 -37
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +31 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +18 -0
- package/src/llama.cpp/include/llama-cpp.h +3 -1
- package/src/llama.cpp/include/llama.h +29 -2
- package/src/llama.cpp/src/llama-adapter.cpp +7 -13
- package/src/llama.cpp/src/llama-adapter.h +1 -3
- package/src/llama.cpp/src/llama-context.cpp +232 -144
- package/src/llama.cpp/src/llama-context.h +10 -0
- package/src/llama.cpp/src/llama-cparams.h +2 -0
- package/src/llama.cpp/src/llama-hparams.cpp +0 -36
- package/src/llama.cpp/src/llama-hparams.h +38 -1
- package/src/llama.cpp/src/llama-kv-cache.cpp +201 -59
- package/src/llama.cpp/src/llama-kv-cache.h +0 -2
- package/src/llama.cpp/src/llama-mmap.cpp +5 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +21 -7
- package/src/llama.cpp/src/llama-model.cpp +5 -1
- package/src/llama.cpp/src/llama-model.h +3 -2
- package/src/llama.cpp/src/llama-sampling.cpp +170 -13
package/lib/binding.ts
CHANGED
|
@@ -374,24 +374,20 @@ export type ModelInfo = {
|
|
|
374
374
|
is_hybrid: boolean
|
|
375
375
|
chatTemplates: {
|
|
376
376
|
llamaChat: boolean
|
|
377
|
-
|
|
377
|
+
jinja: {
|
|
378
378
|
default: boolean
|
|
379
379
|
defaultCaps: {
|
|
380
380
|
tools: boolean
|
|
381
381
|
toolCalls: boolean
|
|
382
|
-
toolResponses: boolean
|
|
383
382
|
systemRole: boolean
|
|
384
383
|
parallelToolCalls: boolean
|
|
385
|
-
toolCallId: boolean
|
|
386
384
|
}
|
|
387
385
|
toolUse: boolean
|
|
388
386
|
toolUseCaps?: {
|
|
389
387
|
tools: boolean
|
|
390
388
|
toolCalls: boolean
|
|
391
|
-
toolResponses: boolean
|
|
392
389
|
systemRole: boolean
|
|
393
390
|
parallelToolCalls: boolean
|
|
394
|
-
toolCallId: boolean
|
|
395
391
|
}
|
|
396
392
|
}
|
|
397
393
|
}
|
package/lib/index.js
CHANGED
|
@@ -80,8 +80,8 @@ class LlamaContextWrapper {
|
|
|
80
80
|
return this.ctx.getUsedDevices();
|
|
81
81
|
}
|
|
82
82
|
isJinjaSupported() {
|
|
83
|
-
const {
|
|
84
|
-
return !!(
|
|
83
|
+
const { jinja } = this.ctx.getModelInfo().chatTemplates;
|
|
84
|
+
return !!(jinja === null || jinja === void 0 ? void 0 : jinja.toolUse) || !!(jinja === null || jinja === void 0 ? void 0 : jinja.default);
|
|
85
85
|
}
|
|
86
86
|
isLlamaChatSupported() {
|
|
87
87
|
return !!this.ctx.getModelInfo().chatTemplates.llamaChat;
|
package/lib/index.ts
CHANGED
|
@@ -100,8 +100,8 @@ class LlamaContextWrapper {
|
|
|
100
100
|
}
|
|
101
101
|
|
|
102
102
|
isJinjaSupported(): boolean {
|
|
103
|
-
const {
|
|
104
|
-
return !!
|
|
103
|
+
const { jinja } = this.ctx.getModelInfo().chatTemplates
|
|
104
|
+
return !!jinja?.toolUse || !!jinja?.default
|
|
105
105
|
}
|
|
106
106
|
|
|
107
107
|
isLlamaChatSupported(): boolean {
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.5.0-rc.0",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,20 +72,20 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-darwin-arm64": "1.
|
|
76
|
-
"@fugood/node-llama-darwin-x64": "1.
|
|
77
|
-
"@fugood/node-llama-linux-arm64": "1.
|
|
78
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.
|
|
79
|
-
"@fugood/node-llama-linux-arm64-snapdragon": "1.
|
|
80
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.
|
|
81
|
-
"@fugood/node-llama-linux-x64": "1.
|
|
82
|
-
"@fugood/node-llama-linux-x64-cuda": "1.
|
|
83
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.
|
|
84
|
-
"@fugood/node-llama-win32-arm64": "1.
|
|
85
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.
|
|
86
|
-
"@fugood/node-llama-win32-x64": "1.
|
|
87
|
-
"@fugood/node-llama-win32-x64-cuda": "1.
|
|
88
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.
|
|
75
|
+
"@fugood/node-llama-darwin-arm64": "1.5.0-rc.0",
|
|
76
|
+
"@fugood/node-llama-darwin-x64": "1.5.0-rc.0",
|
|
77
|
+
"@fugood/node-llama-linux-arm64": "1.5.0-rc.0",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.5.0-rc.0",
|
|
79
|
+
"@fugood/node-llama-linux-arm64-snapdragon": "1.5.0-rc.0",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.5.0-rc.0",
|
|
81
|
+
"@fugood/node-llama-linux-x64": "1.5.0-rc.0",
|
|
82
|
+
"@fugood/node-llama-linux-x64-cuda": "1.5.0-rc.0",
|
|
83
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.5.0-rc.0",
|
|
84
|
+
"@fugood/node-llama-win32-arm64": "1.5.0-rc.0",
|
|
85
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.5.0-rc.0",
|
|
86
|
+
"@fugood/node-llama-win32-x64": "1.5.0-rc.0",
|
|
87
|
+
"@fugood/node-llama-win32-x64-cuda": "1.5.0-rc.0",
|
|
88
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.5.0-rc.0"
|
|
89
89
|
},
|
|
90
90
|
"devDependencies": {
|
|
91
91
|
"@babel/preset-env": "^7.24.4",
|
package/scripts/llama.cpp.patch
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
diff --git a/src/llama.cpp/common/CMakeLists.txt b/src/llama.cpp/common/CMakeLists.txt
|
|
2
|
-
index
|
|
2
|
+
index ae02c0bd7..f74d8bb26 100644
|
|
3
3
|
--- a/src/llama.cpp/common/CMakeLists.txt
|
|
4
4
|
+++ b/src/llama.cpp/common/CMakeLists.txt
|
|
5
|
-
@@ -
|
|
5
|
+
@@ -158,4 +158,11 @@ if (LLAMA_LLGUIDANCE)
|
|
6
6
|
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
|
|
7
7
|
endif ()
|
|
8
8
|
|
|
@@ -29,74 +29,89 @@ index 1bcba9cd8..b7cd68734 100644
|
|
|
29
29
|
static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
|
|
30
30
|
int count = 0;
|
|
31
31
|
diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
|
|
32
|
-
index
|
|
32
|
+
index b29544dac..5fa2c6c17 100644
|
|
33
33
|
--- a/src/llama.cpp/common/chat.cpp
|
|
34
34
|
+++ b/src/llama.cpp/common/chat.cpp
|
|
35
|
-
@@ -
|
|
36
|
-
|
|
37
|
-
#include "regex-partial.h"
|
|
38
|
-
|
|
39
|
-
-#include <minja/chat-template.hpp>
|
|
40
|
-
-#include <minja/minja.hpp>
|
|
41
|
-
-
|
|
42
|
-
#include <algorithm>
|
|
43
|
-
#include <cstdio>
|
|
44
|
-
#include <cctype>
|
|
45
|
-
@@ -135,16 +132,6 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
|
|
46
|
-
return diffs;
|
|
35
|
+
@@ -615,6 +615,37 @@ std::string common_chat_templates_source(const struct common_chat_templates * tm
|
|
36
|
+
return tmpls->template_default->source();
|
|
47
37
|
}
|
|
48
38
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
39
|
+
+common_chat_template_caps common_chat_templates_get_caps(const struct common_chat_templates * tmpls, const std::string & variant) {
|
|
40
|
+
+ common_chat_template_caps result;
|
|
41
|
+
+ const common_chat_template * tmpl = nullptr;
|
|
42
|
+
+
|
|
43
|
+
+ if (!variant.empty() && variant == "tool_use") {
|
|
44
|
+
+ tmpl = tmpls->template_tool_use.get();
|
|
45
|
+
+ } else {
|
|
46
|
+
+ tmpl = tmpls->template_default.get();
|
|
47
|
+
+ }
|
|
48
|
+
+
|
|
49
|
+
+ if (tmpl) {
|
|
50
|
+
+ auto caps = tmpl->original_caps();
|
|
51
|
+
+ result.supports_tools = caps.supports_tools;
|
|
52
|
+
+ result.supports_tool_calls = caps.supports_tool_calls;
|
|
53
|
+
+ result.supports_system_role = caps.supports_system_role;
|
|
54
|
+
+ result.supports_parallel_tool_calls = caps.supports_parallel_tool_calls;
|
|
55
|
+
+ }
|
|
56
|
+
+
|
|
57
|
+
+ return result;
|
|
58
|
+
+}
|
|
59
|
+
+
|
|
60
|
+
+bool common_chat_templates_has_variant(const struct common_chat_templates * tmpls, const std::string & variant) {
|
|
61
|
+
+ if (variant.empty() || variant == "default") {
|
|
62
|
+
+ return tmpls->template_default != nullptr;
|
|
63
|
+
+ }
|
|
64
|
+
+ if (variant == "tool_use") {
|
|
65
|
+
+ return tmpls->template_tool_use != nullptr;
|
|
66
|
+
+ }
|
|
67
|
+
+ return false;
|
|
68
|
+
+}
|
|
69
|
+
+
|
|
70
|
+
common_chat_templates_ptr common_chat_templates_init(
|
|
71
|
+
const struct llama_model * model,
|
|
72
|
+
const std::string & chat_template_override,
|
|
73
|
+
@@ -831,8 +862,9 @@ static std::string apply(
|
|
74
|
+
if (inputs.add_generation_prompt) {
|
|
75
|
+
inp["add_generation_prompt"] = true;
|
|
76
|
+
}
|
|
77
|
+
- if (inp["tools"].is_null()) {
|
|
78
|
+
- inp["tools"] = json::array();
|
|
79
|
+
+ // Remove tools key when null, so templates can check "{% if tools is defined %}"
|
|
80
|
+
+ if (inp["tools"].is_null() || (inp["tools"].is_array() && inp["tools"].empty())) {
|
|
81
|
+
+ inp.erase("tools");
|
|
64
82
|
}
|
|
65
|
-
// TODO: add flag to control date/time, if only for testing purposes.
|
|
66
|
-
- // tmpl_inputs.now = std::chrono::system_clock::now();
|
|
67
|
-
+ tmpl_inputs.now = inputs.now;
|
|
68
83
|
|
|
69
|
-
|
|
70
|
-
// To avoid double BOS / EOS tokens, we're manually removing begining / trailing tokens
|
|
84
|
+
jinja::global_from_json(ctx, inp, inputs.mark_input);
|
|
71
85
|
diff --git a/src/llama.cpp/common/chat.h b/src/llama.cpp/common/chat.h
|
|
72
|
-
index
|
|
86
|
+
index 148801738..0317f1ab1 100644
|
|
73
87
|
--- a/src/llama.cpp/common/chat.h
|
|
74
88
|
+++ b/src/llama.cpp/common/chat.h
|
|
75
|
-
@@ -
|
|
76
|
-
|
|
77
|
-
|
|
89
|
+
@@ -222,6 +222,20 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
|
|
90
|
+
|
|
91
|
+
bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates);
|
|
78
92
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
93
|
+
+// Template capabilities structure (for exposing capabilities to external code)
|
|
94
|
+
+struct common_chat_template_caps {
|
|
95
|
+
+ bool supports_tools = true;
|
|
96
|
+
+ bool supports_tool_calls = true;
|
|
97
|
+
+ bool supports_system_role = true;
|
|
98
|
+
+ bool supports_parallel_tool_calls = true;
|
|
99
|
+
+};
|
|
82
100
|
+
|
|
83
|
-
|
|
101
|
+
+// Get template capabilities for a specific variant ("" for default, "tool_use" for tool_use template)
|
|
102
|
+
+common_chat_template_caps common_chat_templates_get_caps(const struct common_chat_templates * tmpls, const std::string & variant = "");
|
|
84
103
|
+
|
|
85
|
-
|
|
86
|
-
+
|
|
87
|
-
+
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
+};
|
|
92
|
-
|
|
93
|
-
struct common_chat_tool_call {
|
|
94
|
-
std::string name;
|
|
104
|
+
+// Check if a template variant exists
|
|
105
|
+
+bool common_chat_templates_has_variant(const struct common_chat_templates * tmpls, const std::string & variant);
|
|
106
|
+
+
|
|
107
|
+
// Parses a JSON array of messages in OpenAI's chat completion API format.
|
|
108
|
+
// T can be std::string containing JSON or nlohmann::ordered_json
|
|
109
|
+
template <class T> std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const T & messages);
|
|
95
110
|
diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
|
|
96
|
-
index
|
|
111
|
+
index 26250abb6..72ceddcc7 100644
|
|
97
112
|
--- a/src/llama.cpp/common/common.cpp
|
|
98
113
|
+++ b/src/llama.cpp/common/common.cpp
|
|
99
|
-
@@ -
|
|
114
|
+
@@ -1360,6 +1360,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
|
100
115
|
mparams.devices = params.devices.data();
|
|
101
116
|
}
|
|
102
117
|
|
|
@@ -105,10 +120,10 @@ index 744f0b4ee..04fcebb9e 100644
|
|
|
105
120
|
mparams.main_gpu = params.main_gpu;
|
|
106
121
|
mparams.split_mode = params.split_mode;
|
|
107
122
|
diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
|
|
108
|
-
index
|
|
123
|
+
index b9566df62..c9425ad2f 100644
|
|
109
124
|
--- a/src/llama.cpp/common/common.h
|
|
110
125
|
+++ b/src/llama.cpp/common/common.h
|
|
111
|
-
@@ -
|
|
126
|
+
@@ -314,6 +314,7 @@ struct lr_opt {
|
|
112
127
|
struct ggml_opt_optimizer_params common_opt_lr_pars(void * userdata);
|
|
113
128
|
|
|
114
129
|
struct common_params {
|
|
@@ -130,10 +145,10 @@ index 7622d0bf4..d2edcfddb 100644
|
|
|
130
145
|
check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
|
|
131
146
|
if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
|
|
132
147
|
diff --git a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
133
|
-
index
|
|
148
|
+
index 5b835c11c..681c00504 100644
|
|
134
149
|
--- a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
135
150
|
+++ b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
136
|
-
@@ -
|
|
151
|
+
@@ -2819,9 +2819,24 @@ static const char * ggml_backend_hexagon_device_get_description(ggml_backend_dev
|
|
137
152
|
GGML_UNUSED(dev);
|
|
138
153
|
}
|
|
139
154
|
|
|
@@ -160,7 +175,7 @@ index 365a24b49..83bf4ee62 100644
|
|
|
160
175
|
*total = *free;
|
|
161
176
|
|
|
162
177
|
GGML_UNUSED(dev);
|
|
163
|
-
@@ -
|
|
178
|
+
@@ -3056,10 +3071,17 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
|
|
164
179
|
}
|
|
165
180
|
}
|
|
166
181
|
|
|
@@ -182,7 +197,7 @@ index 365a24b49..83bf4ee62 100644
|
|
|
182
197
|
|
|
183
198
|
GGML_LOG_INFO("ggml-hex: Hexagon Arch version v%d\n", opt_arch);
|
|
184
199
|
|
|
185
|
-
@@ -
|
|
200
|
+
@@ -3072,6 +3094,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
|
|
186
201
|
} catch (const std::exception & exc) {
|
|
187
202
|
GGML_LOG_ERROR("ggml-hex: failed to create device/session %zu\n", i);
|
|
188
203
|
devices[i].context = nullptr;
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
#include "DecodeAudioTokenWorker.h"
|
|
11
11
|
#include "ggml.h"
|
|
12
12
|
#include "gguf.h"
|
|
13
|
+
#include "chat.h"
|
|
13
14
|
#include "json-schema-to-grammar.h"
|
|
14
15
|
#include <nlohmann/json.hpp>
|
|
15
16
|
#include "llama-impl.h"
|
|
@@ -600,40 +601,27 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
|
600
601
|
|
|
601
602
|
Napi::Object chatTemplates = Napi::Object::New(info.Env());
|
|
602
603
|
chatTemplates.Set("llamaChat", _rn_ctx->validateModelChatTemplate(false, nullptr));
|
|
603
|
-
Napi::Object
|
|
604
|
-
|
|
604
|
+
Napi::Object jinja = Napi::Object::New(info.Env());
|
|
605
|
+
jinja.Set("default", _rn_ctx->validateModelChatTemplate(true, nullptr));
|
|
605
606
|
Napi::Object defaultCaps = Napi::Object::New(info.Env());
|
|
606
|
-
auto
|
|
607
|
-
|
|
608
|
-
defaultCaps.Set(
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
defaultCaps.Set(
|
|
616
|
-
"systemRole",
|
|
617
|
-
default_tmpl_caps.supports_system_role);
|
|
618
|
-
defaultCaps.Set("parallelToolCalls", default_tmpl_caps.supports_parallel_tool_calls);
|
|
619
|
-
defaultCaps.Set("toolCallId", default_tmpl_caps.supports_tool_call_id);
|
|
620
|
-
minja.Set("defaultCaps", defaultCaps);
|
|
621
|
-
minja.Set("toolUse", _rn_ctx->validateModelChatTemplate(true, "tool_use"));
|
|
622
|
-
if (_rn_ctx->validateModelChatTemplate(true, "tool_use")) {
|
|
607
|
+
auto default_caps = common_chat_templates_get_caps(_rn_ctx->templates.get(), "");
|
|
608
|
+
defaultCaps.Set("tools", default_caps.supports_tools);
|
|
609
|
+
defaultCaps.Set("toolCalls", default_caps.supports_tool_calls);
|
|
610
|
+
defaultCaps.Set("systemRole", default_caps.supports_system_role);
|
|
611
|
+
defaultCaps.Set("parallelToolCalls", default_caps.supports_parallel_tool_calls);
|
|
612
|
+
jinja.Set("defaultCaps", defaultCaps);
|
|
613
|
+
bool hasToolUse = common_chat_templates_has_variant(_rn_ctx->templates.get(), "tool_use");
|
|
614
|
+
jinja.Set("toolUse", hasToolUse);
|
|
615
|
+
if (hasToolUse) {
|
|
623
616
|
Napi::Object toolUseCaps = Napi::Object::New(info.Env());
|
|
624
|
-
auto
|
|
625
|
-
|
|
626
|
-
toolUseCaps.Set(
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
toolUseCaps.Set("parallelToolCalls", tool_use_tmpl_caps.supports_parallel_tool_calls);
|
|
633
|
-
toolUseCaps.Set("toolCallId", tool_use_tmpl_caps.supports_tool_call_id);
|
|
634
|
-
minja.Set("toolUseCaps", toolUseCaps);
|
|
635
|
-
}
|
|
636
|
-
chatTemplates.Set("minja", minja);
|
|
617
|
+
auto tool_use_caps = common_chat_templates_get_caps(_rn_ctx->templates.get(), "tool_use");
|
|
618
|
+
toolUseCaps.Set("tools", tool_use_caps.supports_tools);
|
|
619
|
+
toolUseCaps.Set("toolCalls", tool_use_caps.supports_tool_calls);
|
|
620
|
+
toolUseCaps.Set("systemRole", tool_use_caps.supports_system_role);
|
|
621
|
+
toolUseCaps.Set("parallelToolCalls", tool_use_caps.supports_parallel_tool_calls);
|
|
622
|
+
jinja.Set("toolUseCaps", toolUseCaps);
|
|
623
|
+
}
|
|
624
|
+
chatTemplates.Set("jinja", jinja);
|
|
637
625
|
details.Set("chatTemplates", chatTemplates);
|
|
638
626
|
|
|
639
627
|
details.Set("metadata", metadata);
|
|
@@ -85,6 +85,18 @@ add_library(${TARGET} STATIC
|
|
|
85
85
|
speculative.h
|
|
86
86
|
unicode.cpp
|
|
87
87
|
unicode.h
|
|
88
|
+
jinja/lexer.cpp
|
|
89
|
+
jinja/lexer.h
|
|
90
|
+
jinja/parser.cpp
|
|
91
|
+
jinja/parser.h
|
|
92
|
+
jinja/runtime.cpp
|
|
93
|
+
jinja/runtime.h
|
|
94
|
+
jinja/value.cpp
|
|
95
|
+
jinja/value.h
|
|
96
|
+
jinja/string.cpp
|
|
97
|
+
jinja/string.h
|
|
98
|
+
jinja/caps.cpp
|
|
99
|
+
jinja/caps.h
|
|
88
100
|
)
|
|
89
101
|
|
|
90
102
|
target_include_directories(${TARGET} PUBLIC . ../vendor)
|
|
@@ -1729,6 +1729,26 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1729
1729
|
}
|
|
1730
1730
|
}
|
|
1731
1731
|
).set_sparam());
|
|
1732
|
+
add_opt(common_arg(
|
|
1733
|
+
{"--adaptive-target"}, "N",
|
|
1734
|
+
string_format("adaptive-p: select tokens near this probability (valid range 0.0 "
|
|
1735
|
+
"to 1.0; negative = disabled) (default: %.2f)\n"
|
|
1736
|
+
"[(more info)](https://github.com/ggml-org/llama.cpp/pull/17927)",
|
|
1737
|
+
(double)params.sampling.adaptive_target),
|
|
1738
|
+
[](common_params & params, const std::string & value) {
|
|
1739
|
+
params.sampling.adaptive_target = std::stof(value);
|
|
1740
|
+
}
|
|
1741
|
+
).set_sparam());
|
|
1742
|
+
add_opt(common_arg(
|
|
1743
|
+
{"--adaptive-decay"}, "N",
|
|
1744
|
+
string_format("adaptive-p: decay rate for target adaptation over time. lower values "
|
|
1745
|
+
"are more reactive, higher values are more stable.\n"
|
|
1746
|
+
"(valid range 0.0 to 0.99) (default: %.2f)",
|
|
1747
|
+
(double)params.sampling.adaptive_decay),
|
|
1748
|
+
[](common_params & params, const std::string & value) {
|
|
1749
|
+
params.sampling.adaptive_decay = std::stof(value);
|
|
1750
|
+
}
|
|
1751
|
+
).set_sparam());
|
|
1732
1752
|
add_opt(common_arg(
|
|
1733
1753
|
{"--dynatemp-range"}, "N",
|
|
1734
1754
|
string_format("dynamic temperature range (default: %.1f, 0.0 = disabled)", (double)params.sampling.dynatemp_range),
|