@fugood/llama.node 1.4.14 → 1.5.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +13 -6
- package/lib/index.js +2 -2
- package/lib/index.ts +8 -3
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +77 -65
- package/src/LlamaContext.cpp +31 -34
- package/src/llama.cpp/CMakeLists.txt +24 -8
- package/src/llama.cpp/common/CMakeLists.txt +15 -34
- package/src/llama.cpp/common/arg.cpp +59 -10
- package/src/llama.cpp/common/chat-parser.cpp +115 -0
- package/src/llama.cpp/common/chat.cpp +356 -34
- package/src/llama.cpp/common/chat.h +17 -13
- package/src/llama.cpp/common/common.cpp +0 -1
- package/src/llama.cpp/common/common.h +30 -25
- package/src/llama.cpp/common/debug.cpp +165 -0
- package/src/llama.cpp/common/debug.h +43 -0
- package/src/llama.cpp/common/download.cpp +12 -342
- package/src/llama.cpp/common/download.h +6 -0
- package/src/llama.cpp/common/jinja/caps.cpp +237 -0
- package/src/llama.cpp/common/jinja/caps.h +24 -0
- package/src/llama.cpp/common/jinja/lexer.cpp +341 -0
- package/src/llama.cpp/common/jinja/lexer.h +157 -0
- package/src/llama.cpp/common/jinja/parser.cpp +591 -0
- package/src/llama.cpp/common/jinja/parser.h +21 -0
- package/src/llama.cpp/common/jinja/runtime.cpp +865 -0
- package/src/llama.cpp/common/jinja/runtime.h +628 -0
- package/src/llama.cpp/common/jinja/string.cpp +207 -0
- package/src/llama.cpp/common/jinja/string.h +58 -0
- package/src/llama.cpp/common/jinja/utils.h +49 -0
- package/src/llama.cpp/common/jinja/value.cpp +1221 -0
- package/src/llama.cpp/common/jinja/value.h +464 -0
- package/src/llama.cpp/common/preset.cpp +12 -2
- package/src/llama.cpp/common/sampling.cpp +52 -19
- package/src/llama.cpp/ggml/include/ggml.h +39 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +63 -37
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +31 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +18 -0
- package/src/llama.cpp/include/llama-cpp.h +3 -1
- package/src/llama.cpp/include/llama.h +29 -2
- package/src/llama.cpp/src/CMakeLists.txt +1 -0
- package/src/llama.cpp/src/llama-adapter.cpp +7 -13
- package/src/llama.cpp/src/llama-adapter.h +1 -3
- package/src/llama.cpp/src/llama-arch.cpp +35 -0
- package/src/llama.cpp/src/llama-arch.h +1 -0
- package/src/llama.cpp/src/llama-chat.cpp +20 -0
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +232 -144
- package/src/llama.cpp/src/llama-context.h +10 -0
- package/src/llama.cpp/src/llama-cparams.h +2 -0
- package/src/llama.cpp/src/llama-graph.cpp +31 -43
- package/src/llama.cpp/src/llama-hparams.cpp +0 -36
- package/src/llama.cpp/src/llama-hparams.h +38 -1
- package/src/llama.cpp/src/llama-kv-cache.cpp +201 -59
- package/src/llama.cpp/src/llama-kv-cache.h +0 -2
- package/src/llama.cpp/src/llama-mmap.cpp +13 -6
- package/src/llama.cpp/src/llama-model-loader.cpp +21 -7
- package/src/llama.cpp/src/llama-model.cpp +215 -97
- package/src/llama.cpp/src/llama-model.h +3 -2
- package/src/llama.cpp/src/llama-sampling.cpp +170 -13
- package/src/llama.cpp/src/llama-vocab.cpp +37 -24
- package/src/llama.cpp/src/llama-vocab.h +1 -0
- package/src/llama.cpp/src/models/exaone-moe.cpp +146 -0
- package/src/llama.cpp/src/models/gemma3n-iswa.cpp +13 -3
- package/src/llama.cpp/src/models/models.h +13 -2
- package/src/llama.cpp/src/models/qwen3next.cpp +198 -182
package/lib/binding.ts
CHANGED
|
@@ -374,24 +374,20 @@ export type ModelInfo = {
|
|
|
374
374
|
is_hybrid: boolean
|
|
375
375
|
chatTemplates: {
|
|
376
376
|
llamaChat: boolean
|
|
377
|
-
|
|
377
|
+
jinja: {
|
|
378
378
|
default: boolean
|
|
379
379
|
defaultCaps: {
|
|
380
380
|
tools: boolean
|
|
381
381
|
toolCalls: boolean
|
|
382
|
-
toolResponses: boolean
|
|
383
382
|
systemRole: boolean
|
|
384
383
|
parallelToolCalls: boolean
|
|
385
|
-
toolCallId: boolean
|
|
386
384
|
}
|
|
387
385
|
toolUse: boolean
|
|
388
386
|
toolUseCaps?: {
|
|
389
387
|
tools: boolean
|
|
390
388
|
toolCalls: boolean
|
|
391
|
-
toolResponses: boolean
|
|
392
389
|
systemRole: boolean
|
|
393
390
|
parallelToolCalls: boolean
|
|
394
|
-
toolCallId: boolean
|
|
395
391
|
}
|
|
396
392
|
}
|
|
397
393
|
}
|
|
@@ -515,9 +511,20 @@ export interface LlamaContext {
|
|
|
515
511
|
/**
|
|
516
512
|
* Initialize multimodal support with a mmproj file
|
|
517
513
|
* @param options Object containing path and optional use_gpu flag
|
|
514
|
+
* @param options.path Path to the multimodal projector model file (mmproj)
|
|
515
|
+
* @param options.use_gpu Whether to use GPU for multimodal processing (default: true)
|
|
516
|
+
* @param options.image_min_tokens Minimum number of tokens for image input (for dynamic resolution models)
|
|
517
|
+
* @param options.image_max_tokens Maximum number of tokens for image input (for dynamic resolution models).
|
|
518
|
+
* Lower values reduce memory usage and improve speed for high-resolution images.
|
|
519
|
+
* Recommended: 256-512 for faster inference, up to 4096 for maximum detail.
|
|
518
520
|
* @returns boolean indicating if initialization was successful
|
|
519
521
|
*/
|
|
520
|
-
initMultimodal(options: {
|
|
522
|
+
initMultimodal(options: {
|
|
523
|
+
path: string
|
|
524
|
+
use_gpu?: boolean
|
|
525
|
+
image_min_tokens?: number
|
|
526
|
+
image_max_tokens?: number
|
|
527
|
+
}): boolean
|
|
521
528
|
|
|
522
529
|
/**
|
|
523
530
|
* Check if multimodal support is enabled
|
package/lib/index.js
CHANGED
|
@@ -80,8 +80,8 @@ class LlamaContextWrapper {
|
|
|
80
80
|
return this.ctx.getUsedDevices();
|
|
81
81
|
}
|
|
82
82
|
isJinjaSupported() {
|
|
83
|
-
const {
|
|
84
|
-
return !!(
|
|
83
|
+
const { jinja } = this.ctx.getModelInfo().chatTemplates;
|
|
84
|
+
return !!(jinja === null || jinja === void 0 ? void 0 : jinja.toolUse) || !!(jinja === null || jinja === void 0 ? void 0 : jinja.default);
|
|
85
85
|
}
|
|
86
86
|
isLlamaChatSupported() {
|
|
87
87
|
return !!this.ctx.getModelInfo().chatTemplates.llamaChat;
|
package/lib/index.ts
CHANGED
|
@@ -100,8 +100,8 @@ class LlamaContextWrapper {
|
|
|
100
100
|
}
|
|
101
101
|
|
|
102
102
|
isJinjaSupported(): boolean {
|
|
103
|
-
const {
|
|
104
|
-
return !!
|
|
103
|
+
const { jinja } = this.ctx.getModelInfo().chatTemplates
|
|
104
|
+
return !!jinja?.toolUse || !!jinja?.default
|
|
105
105
|
}
|
|
106
106
|
|
|
107
107
|
isLlamaChatSupported(): boolean {
|
|
@@ -254,7 +254,12 @@ class LlamaContextWrapper {
|
|
|
254
254
|
return this.ctx.getLoadedLoraAdapters()
|
|
255
255
|
}
|
|
256
256
|
|
|
257
|
-
initMultimodal(options: {
|
|
257
|
+
initMultimodal(options: {
|
|
258
|
+
path: string
|
|
259
|
+
use_gpu?: boolean
|
|
260
|
+
image_min_tokens?: number
|
|
261
|
+
image_max_tokens?: number
|
|
262
|
+
}): boolean {
|
|
258
263
|
return this.ctx.initMultimodal(options)
|
|
259
264
|
}
|
|
260
265
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.5.0-rc.0",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,20 +72,20 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-darwin-arm64": "1.
|
|
76
|
-
"@fugood/node-llama-darwin-x64": "1.
|
|
77
|
-
"@fugood/node-llama-linux-arm64": "1.
|
|
78
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.
|
|
79
|
-
"@fugood/node-llama-linux-arm64-snapdragon": "1.
|
|
80
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.
|
|
81
|
-
"@fugood/node-llama-linux-x64": "1.
|
|
82
|
-
"@fugood/node-llama-linux-x64-cuda": "1.
|
|
83
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.
|
|
84
|
-
"@fugood/node-llama-win32-arm64": "1.
|
|
85
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.
|
|
86
|
-
"@fugood/node-llama-win32-x64": "1.
|
|
87
|
-
"@fugood/node-llama-win32-x64-cuda": "1.
|
|
88
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.
|
|
75
|
+
"@fugood/node-llama-darwin-arm64": "1.5.0-rc.0",
|
|
76
|
+
"@fugood/node-llama-darwin-x64": "1.5.0-rc.0",
|
|
77
|
+
"@fugood/node-llama-linux-arm64": "1.5.0-rc.0",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.5.0-rc.0",
|
|
79
|
+
"@fugood/node-llama-linux-arm64-snapdragon": "1.5.0-rc.0",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.5.0-rc.0",
|
|
81
|
+
"@fugood/node-llama-linux-x64": "1.5.0-rc.0",
|
|
82
|
+
"@fugood/node-llama-linux-x64-cuda": "1.5.0-rc.0",
|
|
83
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.5.0-rc.0",
|
|
84
|
+
"@fugood/node-llama-win32-arm64": "1.5.0-rc.0",
|
|
85
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.5.0-rc.0",
|
|
86
|
+
"@fugood/node-llama-win32-x64": "1.5.0-rc.0",
|
|
87
|
+
"@fugood/node-llama-win32-x64-cuda": "1.5.0-rc.0",
|
|
88
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.5.0-rc.0"
|
|
89
89
|
},
|
|
90
90
|
"devDependencies": {
|
|
91
91
|
"@babel/preset-env": "^7.24.4",
|
package/scripts/llama.cpp.patch
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
diff --git a/src/llama.cpp/common/CMakeLists.txt b/src/llama.cpp/common/CMakeLists.txt
|
|
2
|
-
index
|
|
2
|
+
index ae02c0bd7..f74d8bb26 100644
|
|
3
3
|
--- a/src/llama.cpp/common/CMakeLists.txt
|
|
4
4
|
+++ b/src/llama.cpp/common/CMakeLists.txt
|
|
5
|
-
@@ -
|
|
5
|
+
@@ -158,4 +158,11 @@ if (LLAMA_LLGUIDANCE)
|
|
6
6
|
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
|
|
7
7
|
endif ()
|
|
8
8
|
|
|
@@ -13,11 +13,8 @@ index f7b99159e..fa37fed19 100644
|
|
|
13
13
|
+else()
|
|
14
14
|
+ set(LLAMA_COMMON_WIN_LIBS "")
|
|
15
15
|
+endif()
|
|
16
|
-
|
|
16
|
+
+
|
|
17
17
|
+target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} ${LLAMA_COMMON_WIN_LIBS} PUBLIC llama Threads::Threads)
|
|
18
|
-
|
|
19
|
-
#
|
|
20
|
-
# copy the license files
|
|
21
18
|
diff --git a/src/llama.cpp/common/chat-peg-parser.cpp b/src/llama.cpp/common/chat-peg-parser.cpp
|
|
22
19
|
index 1bcba9cd8..b7cd68734 100644
|
|
23
20
|
--- a/src/llama.cpp/common/chat-peg-parser.cpp
|
|
@@ -32,74 +29,89 @@ index 1bcba9cd8..b7cd68734 100644
|
|
|
32
29
|
static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
|
|
33
30
|
int count = 0;
|
|
34
31
|
diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
|
|
35
|
-
index
|
|
32
|
+
index b29544dac..5fa2c6c17 100644
|
|
36
33
|
--- a/src/llama.cpp/common/chat.cpp
|
|
37
34
|
+++ b/src/llama.cpp/common/chat.cpp
|
|
38
|
-
@@ -
|
|
39
|
-
|
|
40
|
-
#include "regex-partial.h"
|
|
41
|
-
|
|
42
|
-
-#include <minja/chat-template.hpp>
|
|
43
|
-
-#include <minja/minja.hpp>
|
|
44
|
-
-
|
|
45
|
-
#include <algorithm>
|
|
46
|
-
#include <cstdio>
|
|
47
|
-
#include <cctype>
|
|
48
|
-
@@ -135,16 +132,6 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
|
|
49
|
-
return diffs;
|
|
35
|
+
@@ -615,6 +615,37 @@ std::string common_chat_templates_source(const struct common_chat_templates * tm
|
|
36
|
+
return tmpls->template_default->source();
|
|
50
37
|
}
|
|
51
38
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
39
|
+
+common_chat_template_caps common_chat_templates_get_caps(const struct common_chat_templates * tmpls, const std::string & variant) {
|
|
40
|
+
+ common_chat_template_caps result;
|
|
41
|
+
+ const common_chat_template * tmpl = nullptr;
|
|
42
|
+
+
|
|
43
|
+
+ if (!variant.empty() && variant == "tool_use") {
|
|
44
|
+
+ tmpl = tmpls->template_tool_use.get();
|
|
45
|
+
+ } else {
|
|
46
|
+
+ tmpl = tmpls->template_default.get();
|
|
47
|
+
+ }
|
|
48
|
+
+
|
|
49
|
+
+ if (tmpl) {
|
|
50
|
+
+ auto caps = tmpl->original_caps();
|
|
51
|
+
+ result.supports_tools = caps.supports_tools;
|
|
52
|
+
+ result.supports_tool_calls = caps.supports_tool_calls;
|
|
53
|
+
+ result.supports_system_role = caps.supports_system_role;
|
|
54
|
+
+ result.supports_parallel_tool_calls = caps.supports_parallel_tool_calls;
|
|
55
|
+
+ }
|
|
56
|
+
+
|
|
57
|
+
+ return result;
|
|
58
|
+
+}
|
|
59
|
+
+
|
|
60
|
+
+bool common_chat_templates_has_variant(const struct common_chat_templates * tmpls, const std::string & variant) {
|
|
61
|
+
+ if (variant.empty() || variant == "default") {
|
|
62
|
+
+ return tmpls->template_default != nullptr;
|
|
63
|
+
+ }
|
|
64
|
+
+ if (variant == "tool_use") {
|
|
65
|
+
+ return tmpls->template_tool_use != nullptr;
|
|
66
|
+
+ }
|
|
67
|
+
+ return false;
|
|
68
|
+
+}
|
|
69
|
+
+
|
|
70
|
+
common_chat_templates_ptr common_chat_templates_init(
|
|
71
|
+
const struct llama_model * model,
|
|
72
|
+
const std::string & chat_template_override,
|
|
73
|
+
@@ -831,8 +862,9 @@ static std::string apply(
|
|
74
|
+
if (inputs.add_generation_prompt) {
|
|
75
|
+
inp["add_generation_prompt"] = true;
|
|
76
|
+
}
|
|
77
|
+
- if (inp["tools"].is_null()) {
|
|
78
|
+
- inp["tools"] = json::array();
|
|
79
|
+
+ // Remove tools key when null, so templates can check "{% if tools is defined %}"
|
|
80
|
+
+ if (inp["tools"].is_null() || (inp["tools"].is_array() && inp["tools"].empty())) {
|
|
81
|
+
+ inp.erase("tools");
|
|
67
82
|
}
|
|
68
|
-
// TODO: add flag to control date/time, if only for testing purposes.
|
|
69
|
-
- // tmpl_inputs.now = std::chrono::system_clock::now();
|
|
70
|
-
+ tmpl_inputs.now = inputs.now;
|
|
71
83
|
|
|
72
|
-
|
|
73
|
-
// To avoid double BOS / EOS tokens, we're manually removing begining / trailing tokens
|
|
84
|
+
jinja::global_from_json(ctx, inp, inputs.mark_input);
|
|
74
85
|
diff --git a/src/llama.cpp/common/chat.h b/src/llama.cpp/common/chat.h
|
|
75
|
-
index
|
|
86
|
+
index 148801738..0317f1ab1 100644
|
|
76
87
|
--- a/src/llama.cpp/common/chat.h
|
|
77
88
|
+++ b/src/llama.cpp/common/chat.h
|
|
78
|
-
@@ -
|
|
79
|
-
|
|
80
|
-
|
|
89
|
+
@@ -222,6 +222,20 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
|
|
90
|
+
|
|
91
|
+
bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates);
|
|
81
92
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
93
|
+
+// Template capabilities structure (for exposing capabilities to external code)
|
|
94
|
+
+struct common_chat_template_caps {
|
|
95
|
+
+ bool supports_tools = true;
|
|
96
|
+
+ bool supports_tool_calls = true;
|
|
97
|
+
+ bool supports_system_role = true;
|
|
98
|
+
+ bool supports_parallel_tool_calls = true;
|
|
99
|
+
+};
|
|
85
100
|
+
|
|
86
|
-
|
|
101
|
+
+// Get template capabilities for a specific variant ("" for default, "tool_use" for tool_use template)
|
|
102
|
+
+common_chat_template_caps common_chat_templates_get_caps(const struct common_chat_templates * tmpls, const std::string & variant = "");
|
|
87
103
|
+
|
|
88
|
-
|
|
89
|
-
+
|
|
90
|
-
+
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
+};
|
|
95
|
-
|
|
96
|
-
struct common_chat_tool_call {
|
|
97
|
-
std::string name;
|
|
104
|
+
+// Check if a template variant exists
|
|
105
|
+
+bool common_chat_templates_has_variant(const struct common_chat_templates * tmpls, const std::string & variant);
|
|
106
|
+
+
|
|
107
|
+
// Parses a JSON array of messages in OpenAI's chat completion API format.
|
|
108
|
+
// T can be std::string containing JSON or nlohmann::ordered_json
|
|
109
|
+
template <class T> std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const T & messages);
|
|
98
110
|
diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
|
|
99
|
-
index
|
|
111
|
+
index 26250abb6..72ceddcc7 100644
|
|
100
112
|
--- a/src/llama.cpp/common/common.cpp
|
|
101
113
|
+++ b/src/llama.cpp/common/common.cpp
|
|
102
|
-
@@ -
|
|
114
|
+
@@ -1360,6 +1360,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
|
103
115
|
mparams.devices = params.devices.data();
|
|
104
116
|
}
|
|
105
117
|
|
|
@@ -108,10 +120,10 @@ index 744f0b4ee..04fcebb9e 100644
|
|
|
108
120
|
mparams.main_gpu = params.main_gpu;
|
|
109
121
|
mparams.split_mode = params.split_mode;
|
|
110
122
|
diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
|
|
111
|
-
index
|
|
123
|
+
index b9566df62..c9425ad2f 100644
|
|
112
124
|
--- a/src/llama.cpp/common/common.h
|
|
113
125
|
+++ b/src/llama.cpp/common/common.h
|
|
114
|
-
@@ -
|
|
126
|
+
@@ -314,6 +314,7 @@ struct lr_opt {
|
|
115
127
|
struct ggml_opt_optimizer_params common_opt_lr_pars(void * userdata);
|
|
116
128
|
|
|
117
129
|
struct common_params {
|
|
@@ -133,10 +145,10 @@ index 7622d0bf4..d2edcfddb 100644
|
|
|
133
145
|
check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
|
|
134
146
|
if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
|
|
135
147
|
diff --git a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
136
|
-
index
|
|
148
|
+
index 5b835c11c..681c00504 100644
|
|
137
149
|
--- a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
138
150
|
+++ b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
139
|
-
@@ -
|
|
151
|
+
@@ -2819,9 +2819,24 @@ static const char * ggml_backend_hexagon_device_get_description(ggml_backend_dev
|
|
140
152
|
GGML_UNUSED(dev);
|
|
141
153
|
}
|
|
142
154
|
|
|
@@ -163,7 +175,7 @@ index 365a24b49..83bf4ee62 100644
|
|
|
163
175
|
*total = *free;
|
|
164
176
|
|
|
165
177
|
GGML_UNUSED(dev);
|
|
166
|
-
@@ -
|
|
178
|
+
@@ -3056,10 +3071,17 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
|
|
167
179
|
}
|
|
168
180
|
}
|
|
169
181
|
|
|
@@ -185,7 +197,7 @@ index 365a24b49..83bf4ee62 100644
|
|
|
185
197
|
|
|
186
198
|
GGML_LOG_INFO("ggml-hex: Hexagon Arch version v%d\n", opt_arch);
|
|
187
199
|
|
|
188
|
-
@@ -
|
|
200
|
+
@@ -3072,6 +3094,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
|
|
189
201
|
} catch (const std::exception & exc) {
|
|
190
202
|
GGML_LOG_ERROR("ggml-hex: failed to create device/session %zu\n", i);
|
|
191
203
|
devices[i].context = nullptr;
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
#include "DecodeAudioTokenWorker.h"
|
|
11
11
|
#include "ggml.h"
|
|
12
12
|
#include "gguf.h"
|
|
13
|
+
#include "chat.h"
|
|
13
14
|
#include "json-schema-to-grammar.h"
|
|
14
15
|
#include <nlohmann/json.hpp>
|
|
15
16
|
#include "llama-impl.h"
|
|
@@ -600,40 +601,27 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
|
600
601
|
|
|
601
602
|
Napi::Object chatTemplates = Napi::Object::New(info.Env());
|
|
602
603
|
chatTemplates.Set("llamaChat", _rn_ctx->validateModelChatTemplate(false, nullptr));
|
|
603
|
-
Napi::Object
|
|
604
|
-
|
|
604
|
+
Napi::Object jinja = Napi::Object::New(info.Env());
|
|
605
|
+
jinja.Set("default", _rn_ctx->validateModelChatTemplate(true, nullptr));
|
|
605
606
|
Napi::Object defaultCaps = Napi::Object::New(info.Env());
|
|
606
|
-
auto
|
|
607
|
-
|
|
608
|
-
defaultCaps.Set(
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
defaultCaps.Set(
|
|
616
|
-
"systemRole",
|
|
617
|
-
default_tmpl_caps.supports_system_role);
|
|
618
|
-
defaultCaps.Set("parallelToolCalls", default_tmpl_caps.supports_parallel_tool_calls);
|
|
619
|
-
defaultCaps.Set("toolCallId", default_tmpl_caps.supports_tool_call_id);
|
|
620
|
-
minja.Set("defaultCaps", defaultCaps);
|
|
621
|
-
minja.Set("toolUse", _rn_ctx->validateModelChatTemplate(true, "tool_use"));
|
|
622
|
-
if (_rn_ctx->validateModelChatTemplate(true, "tool_use")) {
|
|
607
|
+
auto default_caps = common_chat_templates_get_caps(_rn_ctx->templates.get(), "");
|
|
608
|
+
defaultCaps.Set("tools", default_caps.supports_tools);
|
|
609
|
+
defaultCaps.Set("toolCalls", default_caps.supports_tool_calls);
|
|
610
|
+
defaultCaps.Set("systemRole", default_caps.supports_system_role);
|
|
611
|
+
defaultCaps.Set("parallelToolCalls", default_caps.supports_parallel_tool_calls);
|
|
612
|
+
jinja.Set("defaultCaps", defaultCaps);
|
|
613
|
+
bool hasToolUse = common_chat_templates_has_variant(_rn_ctx->templates.get(), "tool_use");
|
|
614
|
+
jinja.Set("toolUse", hasToolUse);
|
|
615
|
+
if (hasToolUse) {
|
|
623
616
|
Napi::Object toolUseCaps = Napi::Object::New(info.Env());
|
|
624
|
-
auto
|
|
625
|
-
|
|
626
|
-
toolUseCaps.Set(
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
toolUseCaps.Set("parallelToolCalls", tool_use_tmpl_caps.supports_parallel_tool_calls);
|
|
633
|
-
toolUseCaps.Set("toolCallId", tool_use_tmpl_caps.supports_tool_call_id);
|
|
634
|
-
minja.Set("toolUseCaps", toolUseCaps);
|
|
635
|
-
}
|
|
636
|
-
chatTemplates.Set("minja", minja);
|
|
617
|
+
auto tool_use_caps = common_chat_templates_get_caps(_rn_ctx->templates.get(), "tool_use");
|
|
618
|
+
toolUseCaps.Set("tools", tool_use_caps.supports_tools);
|
|
619
|
+
toolUseCaps.Set("toolCalls", tool_use_caps.supports_tool_calls);
|
|
620
|
+
toolUseCaps.Set("systemRole", tool_use_caps.supports_system_role);
|
|
621
|
+
toolUseCaps.Set("parallelToolCalls", tool_use_caps.supports_parallel_tool_calls);
|
|
622
|
+
jinja.Set("toolUseCaps", toolUseCaps);
|
|
623
|
+
}
|
|
624
|
+
chatTemplates.Set("jinja", jinja);
|
|
637
625
|
details.Set("chatTemplates", chatTemplates);
|
|
638
626
|
|
|
639
627
|
details.Set("metadata", metadata);
|
|
@@ -1333,7 +1321,7 @@ extern "C" void cleanup_logging() {
|
|
|
1333
1321
|
}
|
|
1334
1322
|
|
|
1335
1323
|
|
|
1336
|
-
// initMultimodal(options: { path: string, use_gpu?: boolean }): boolean
|
|
1324
|
+
// initMultimodal(options: { path: string, use_gpu?: boolean, image_min_tokens?: number, image_max_tokens?: number }): boolean
|
|
1337
1325
|
Napi::Value LlamaContext::InitMultimodal(const Napi::CallbackInfo &info) {
|
|
1338
1326
|
Napi::Env env = info.Env();
|
|
1339
1327
|
|
|
@@ -1345,6 +1333,15 @@ Napi::Value LlamaContext::InitMultimodal(const Napi::CallbackInfo &info) {
|
|
|
1345
1333
|
auto options = info[0].As<Napi::Object>();
|
|
1346
1334
|
auto mmproj_path = options.Get("path").ToString().Utf8Value();
|
|
1347
1335
|
auto use_gpu = options.Get("use_gpu").ToBoolean().Value();
|
|
1336
|
+
int image_min_tokens = -1;
|
|
1337
|
+
int image_max_tokens = -1;
|
|
1338
|
+
|
|
1339
|
+
if (options.Has("image_min_tokens") && options.Get("image_min_tokens").IsNumber()) {
|
|
1340
|
+
image_min_tokens = options.Get("image_min_tokens").ToNumber().Int32Value();
|
|
1341
|
+
}
|
|
1342
|
+
if (options.Has("image_max_tokens") && options.Get("image_max_tokens").IsNumber()) {
|
|
1343
|
+
image_max_tokens = options.Get("image_max_tokens").ToNumber().Int32Value();
|
|
1344
|
+
}
|
|
1348
1345
|
|
|
1349
1346
|
if (mmproj_path.empty()) {
|
|
1350
1347
|
Napi::TypeError::New(env, "mmproj path is required")
|
|
@@ -1360,7 +1357,7 @@ Napi::Value LlamaContext::InitMultimodal(const Napi::CallbackInfo &info) {
|
|
|
1360
1357
|
|
|
1361
1358
|
// Disable ctx_shift before initializing multimodal
|
|
1362
1359
|
_rn_ctx->params.ctx_shift = false;
|
|
1363
|
-
bool result = _rn_ctx->initMultimodal(mmproj_path, use_gpu);
|
|
1360
|
+
bool result = _rn_ctx->initMultimodal(mmproj_path, use_gpu, image_min_tokens, image_max_tokens);
|
|
1364
1361
|
if (!result) {
|
|
1365
1362
|
Napi::Error::New(env, "Failed to initialize multimodal context")
|
|
1366
1363
|
.ThrowAsJavaScriptException();
|
|
@@ -111,11 +111,16 @@ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
|
|
|
111
111
|
option(LLAMA_TOOLS_INSTALL "llama: install tools" ${LLAMA_TOOLS_INSTALL_DEFAULT})
|
|
112
112
|
|
|
113
113
|
# 3rd party libs
|
|
114
|
-
option(
|
|
115
|
-
option(
|
|
116
|
-
option(LLAMA_OPENSSL "llama: use openssl to support HTTPS" OFF)
|
|
114
|
+
option(LLAMA_HTTPLIB "llama: httplib for downloading functionality" ON)
|
|
115
|
+
option(LLAMA_OPENSSL "llama: use openssl to support HTTPS" ON)
|
|
117
116
|
option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
|
|
118
117
|
|
|
118
|
+
# deprecated
|
|
119
|
+
option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
|
|
120
|
+
if (LLAMA_CURL)
|
|
121
|
+
message(WARNING "LLAMA_CURL option is deprecated and will be ignored")
|
|
122
|
+
endif()
|
|
123
|
+
|
|
119
124
|
# Required for relocatable CMake package
|
|
120
125
|
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
|
|
121
126
|
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)
|
|
@@ -182,6 +187,9 @@ if (NOT MSVC)
|
|
|
182
187
|
endif()
|
|
183
188
|
endif()
|
|
184
189
|
|
|
190
|
+
include("cmake/license.cmake")
|
|
191
|
+
license_add_file("llama.cpp" "LICENSE")
|
|
192
|
+
|
|
185
193
|
#
|
|
186
194
|
# 3rd-party
|
|
187
195
|
#
|
|
@@ -209,11 +217,6 @@ add_subdirectory(src)
|
|
|
209
217
|
# utils, programs, examples and tests
|
|
210
218
|
#
|
|
211
219
|
|
|
212
|
-
if (NOT LLAMA_BUILD_COMMON)
|
|
213
|
-
message(STATUS "LLAMA_BUILD_COMMON is OFF, disabling LLAMA_CURL")
|
|
214
|
-
set(LLAMA_CURL OFF)
|
|
215
|
-
endif()
|
|
216
|
-
|
|
217
220
|
if (LLAMA_BUILD_COMMON)
|
|
218
221
|
add_subdirectory(common)
|
|
219
222
|
if (LLAMA_HTTPLIB)
|
|
@@ -235,6 +238,19 @@ if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TOOLS)
|
|
|
235
238
|
add_subdirectory(tools)
|
|
236
239
|
endif()
|
|
237
240
|
|
|
241
|
+
# Automatically add all files from the 'licenses' directory
|
|
242
|
+
file(GLOB EXTRA_LICENSES "${CMAKE_SOURCE_DIR}/licenses/LICENSE-*")
|
|
243
|
+
|
|
244
|
+
foreach(FILE_PATH ${EXTRA_LICENSES})
|
|
245
|
+
get_filename_component(FILE_NAME "${FILE_PATH}" NAME)
|
|
246
|
+
string(REGEX REPLACE "^LICENSE-" "" NAME "${FILE_NAME}")
|
|
247
|
+
license_add_file("${NAME}" "${FILE_PATH}")
|
|
248
|
+
endforeach()
|
|
249
|
+
|
|
250
|
+
if (LLAMA_BUILD_COMMON)
|
|
251
|
+
license_generate(common)
|
|
252
|
+
endif()
|
|
253
|
+
|
|
238
254
|
#
|
|
239
255
|
# install
|
|
240
256
|
#
|
|
@@ -60,6 +60,8 @@ add_library(${TARGET} STATIC
|
|
|
60
60
|
common.h
|
|
61
61
|
console.cpp
|
|
62
62
|
console.h
|
|
63
|
+
debug.cpp
|
|
64
|
+
debug.h
|
|
63
65
|
download.cpp
|
|
64
66
|
download.h
|
|
65
67
|
http.h
|
|
@@ -83,6 +85,18 @@ add_library(${TARGET} STATIC
|
|
|
83
85
|
speculative.h
|
|
84
86
|
unicode.cpp
|
|
85
87
|
unicode.h
|
|
88
|
+
jinja/lexer.cpp
|
|
89
|
+
jinja/lexer.h
|
|
90
|
+
jinja/parser.cpp
|
|
91
|
+
jinja/parser.h
|
|
92
|
+
jinja/runtime.cpp
|
|
93
|
+
jinja/runtime.h
|
|
94
|
+
jinja/value.cpp
|
|
95
|
+
jinja/value.h
|
|
96
|
+
jinja/string.cpp
|
|
97
|
+
jinja/string.h
|
|
98
|
+
jinja/caps.cpp
|
|
99
|
+
jinja/caps.h
|
|
86
100
|
)
|
|
87
101
|
|
|
88
102
|
target_include_directories(${TARGET} PUBLIC . ../vendor)
|
|
@@ -95,17 +109,7 @@ endif()
|
|
|
95
109
|
# TODO: use list(APPEND LLAMA_COMMON_EXTRA_LIBS ...)
|
|
96
110
|
set(LLAMA_COMMON_EXTRA_LIBS build_info)
|
|
97
111
|
|
|
98
|
-
if (
|
|
99
|
-
# Use curl to download model url
|
|
100
|
-
find_package(CURL)
|
|
101
|
-
if (NOT CURL_FOUND)
|
|
102
|
-
message(FATAL_ERROR "Could NOT find CURL. Hint: to disable this feature, set -DLLAMA_CURL=OFF")
|
|
103
|
-
endif()
|
|
104
|
-
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
|
|
105
|
-
include_directories(${CURL_INCLUDE_DIRS})
|
|
106
|
-
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARIES})
|
|
107
|
-
elseif (LLAMA_HTTPLIB)
|
|
108
|
-
# otherwise, use cpp-httplib
|
|
112
|
+
if (LLAMA_HTTPLIB)
|
|
109
113
|
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_HTTPLIB)
|
|
110
114
|
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} cpp-httplib)
|
|
111
115
|
endif()
|
|
@@ -162,26 +166,3 @@ else()
|
|
|
162
166
|
endif()
|
|
163
167
|
|
|
164
168
|
target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} ${LLAMA_COMMON_WIN_LIBS} PUBLIC llama Threads::Threads)
|
|
165
|
-
|
|
166
|
-
#
|
|
167
|
-
# copy the license files
|
|
168
|
-
#
|
|
169
|
-
|
|
170
|
-
# Check if running in GitHub Actions
|
|
171
|
-
if (DEFINED ENV{GITHUB_ACTIONS} AND "$ENV{GITHUB_ACTIONS}" STREQUAL "true")
|
|
172
|
-
message(STATUS "Running inside GitHub Actions - copying license files")
|
|
173
|
-
|
|
174
|
-
# Copy all files from licenses/ to build/bin/
|
|
175
|
-
file(GLOB LICENSE_FILES "${CMAKE_SOURCE_DIR}/licenses/*")
|
|
176
|
-
foreach(LICENSE_FILE ${LICENSE_FILES})
|
|
177
|
-
get_filename_component(FILENAME ${LICENSE_FILE} NAME)
|
|
178
|
-
add_custom_command(
|
|
179
|
-
POST_BUILD
|
|
180
|
-
TARGET ${TARGET}
|
|
181
|
-
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
|
182
|
-
"${LICENSE_FILE}"
|
|
183
|
-
"$<TARGET_FILE_DIR:llama>/${FILENAME}"
|
|
184
|
-
COMMENT "Copying ${FILENAME} to ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")
|
|
185
|
-
message(STATUS "Copying ${LICENSE_FILE} to ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${FILENAME}")
|
|
186
|
-
endforeach()
|
|
187
|
-
endif()
|