@fugood/llama.node 1.4.13 → 1.4.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +23 -2
- package/lib/index.js +2 -1
- package/lib/index.ts +8 -1
- package/lib/parallel.ts +2 -2
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +9 -12
- package/src/LlamaContext.cpp +16 -4
- package/src/llama.cpp/CMakeLists.txt +24 -8
- package/src/llama.cpp/common/CMakeLists.txt +3 -34
- package/src/llama.cpp/common/arg.cpp +183 -60
- package/src/llama.cpp/common/arg.h +0 -8
- package/src/llama.cpp/common/chat-parser.cpp +115 -0
- package/src/llama.cpp/common/chat.cpp +67 -0
- package/src/llama.cpp/common/chat.h +1 -0
- package/src/llama.cpp/common/common.cpp +2 -1
- package/src/llama.cpp/common/common.h +12 -7
- package/src/llama.cpp/common/debug.cpp +165 -0
- package/src/llama.cpp/common/debug.h +43 -0
- package/src/llama.cpp/common/download.cpp +88 -369
- package/src/llama.cpp/common/download.h +32 -5
- package/src/llama.cpp/common/preset.cpp +87 -2
- package/src/llama.cpp/common/preset.h +10 -1
- package/src/llama.cpp/ggml/include/ggml.h +5 -0
- package/src/llama.cpp/include/llama.h +5 -2
- package/src/llama.cpp/src/CMakeLists.txt +1 -0
- package/src/llama.cpp/src/llama-arch.cpp +35 -0
- package/src/llama.cpp/src/llama-arch.h +1 -0
- package/src/llama.cpp/src/llama-chat.cpp +20 -0
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +31 -43
- package/src/llama.cpp/src/llama-mmap.cpp +78 -42
- package/src/llama.cpp/src/llama-mmap.h +5 -4
- package/src/llama.cpp/src/llama-model-loader.cpp +17 -5
- package/src/llama.cpp/src/llama-model-loader.h +2 -0
- package/src/llama.cpp/src/llama-model.cpp +225 -101
- package/src/llama.cpp/src/llama-quant.cpp +1 -1
- package/src/llama.cpp/src/llama-sampling.cpp +1 -1
- package/src/llama.cpp/src/llama-vocab.cpp +37 -24
- package/src/llama.cpp/src/llama-vocab.h +1 -0
- package/src/llama.cpp/src/llama.cpp +63 -27
- package/src/llama.cpp/src/models/exaone-moe.cpp +146 -0
- package/src/llama.cpp/src/models/gemma3n-iswa.cpp +13 -3
- package/src/llama.cpp/src/models/models.h +13 -2
- package/src/llama.cpp/src/models/qwen3next.cpp +198 -182
package/lib/binding.ts
CHANGED
|
@@ -112,7 +112,7 @@ export type CompletionResponseFormat = {
|
|
|
112
112
|
export type LlamaCompletionOptions = {
|
|
113
113
|
messages?: ChatMessage[]
|
|
114
114
|
jinja?: boolean
|
|
115
|
-
reasoning_format?:
|
|
115
|
+
reasoning_format?: 'none' | 'auto' | 'deepseek'
|
|
116
116
|
chat_template?: string
|
|
117
117
|
response_format?: CompletionResponseFormat
|
|
118
118
|
tools?: Tool[]
|
|
@@ -200,6 +200,13 @@ export type LlamaParallelCompletionOptions = LlamaCompletionOptions & {
|
|
|
200
200
|
*/
|
|
201
201
|
save_state_path?: string
|
|
202
202
|
|
|
203
|
+
/**
|
|
204
|
+
* File path to save prompt-only state to after prompt processing.
|
|
205
|
+
* Useful for fast prompt reuse (especially for recurrent/hybrid models).
|
|
206
|
+
* Example: `'/path/to/prompt_state.bin'` or `'file:///path/to/prompt_state.bin'`
|
|
207
|
+
*/
|
|
208
|
+
save_prompt_state_path?: string
|
|
209
|
+
|
|
203
210
|
/**
|
|
204
211
|
* Number of tokens to load when loading state.
|
|
205
212
|
* If not specified or <= 0, all tokens from the state file will be loaded.
|
|
@@ -363,6 +370,8 @@ export type ModelInfo = {
|
|
|
363
370
|
nEmbd: number
|
|
364
371
|
nParams: number
|
|
365
372
|
size: number
|
|
373
|
+
is_recurrent: boolean
|
|
374
|
+
is_hybrid: boolean
|
|
366
375
|
chatTemplates: {
|
|
367
376
|
llamaChat: boolean
|
|
368
377
|
minja: {
|
|
@@ -475,6 +484,7 @@ export interface LlamaContext {
|
|
|
475
484
|
parallel_tool_calls?: boolean
|
|
476
485
|
tool_choice?: string
|
|
477
486
|
enable_thinking?: boolean
|
|
487
|
+
reasoning_format?: 'none' | 'auto' | 'deepseek'
|
|
478
488
|
add_generation_prompt?: boolean
|
|
479
489
|
now?: string | number
|
|
480
490
|
chat_template_kwargs?: Record<string, string>
|
|
@@ -505,9 +515,20 @@ export interface LlamaContext {
|
|
|
505
515
|
/**
|
|
506
516
|
* Initialize multimodal support with a mmproj file
|
|
507
517
|
* @param options Object containing path and optional use_gpu flag
|
|
518
|
+
* @param options.path Path to the multimodal projector model file (mmproj)
|
|
519
|
+
* @param options.use_gpu Whether to use GPU for multimodal processing (default: true)
|
|
520
|
+
* @param options.image_min_tokens Minimum number of tokens for image input (for dynamic resolution models)
|
|
521
|
+
* @param options.image_max_tokens Maximum number of tokens for image input (for dynamic resolution models).
|
|
522
|
+
* Lower values reduce memory usage and improve speed for high-resolution images.
|
|
523
|
+
* Recommended: 256-512 for faster inference, up to 4096 for maximum detail.
|
|
508
524
|
* @returns boolean indicating if initialization was successful
|
|
509
525
|
*/
|
|
510
|
-
initMultimodal(options: {
|
|
526
|
+
initMultimodal(options: {
|
|
527
|
+
path: string
|
|
528
|
+
use_gpu?: boolean
|
|
529
|
+
image_min_tokens?: number
|
|
530
|
+
image_max_tokens?: number
|
|
531
|
+
}): boolean
|
|
511
532
|
|
|
512
533
|
/**
|
|
513
534
|
* Check if multimodal support is enabled
|
package/lib/index.js
CHANGED
|
@@ -87,7 +87,7 @@ class LlamaContextWrapper {
|
|
|
87
87
|
return !!this.ctx.getModelInfo().chatTemplates.llamaChat;
|
|
88
88
|
}
|
|
89
89
|
getFormattedChat(messages, template, params) {
|
|
90
|
-
var _a, _b;
|
|
90
|
+
var _a, _b, _c;
|
|
91
91
|
const { messages: chat, has_media, media_paths } = (0, utils_1.formatMediaChat)(messages);
|
|
92
92
|
const useJinja = this.isJinjaSupported() && ((_a = params === null || params === void 0 ? void 0 : params.jinja) !== null && _a !== void 0 ? _a : true);
|
|
93
93
|
let tmpl;
|
|
@@ -100,6 +100,7 @@ class LlamaContextWrapper {
|
|
|
100
100
|
parallel_tool_calls: params === null || params === void 0 ? void 0 : params.parallel_tool_calls,
|
|
101
101
|
tool_choice: params === null || params === void 0 ? void 0 : params.tool_choice,
|
|
102
102
|
enable_thinking: (_b = params === null || params === void 0 ? void 0 : params.enable_thinking) !== null && _b !== void 0 ? _b : true,
|
|
103
|
+
reasoning_format: (_c = params === null || params === void 0 ? void 0 : params.reasoning_format) !== null && _c !== void 0 ? _c : 'none',
|
|
103
104
|
add_generation_prompt: params === null || params === void 0 ? void 0 : params.add_generation_prompt,
|
|
104
105
|
now: params === null || params === void 0 ? void 0 : params.now,
|
|
105
106
|
chat_template_kwargs: (params === null || params === void 0 ? void 0 : params.chat_template_kwargs)
|
package/lib/index.ts
CHANGED
|
@@ -118,6 +118,7 @@ class LlamaContextWrapper {
|
|
|
118
118
|
parallel_tool_calls?: boolean
|
|
119
119
|
tool_choice?: string
|
|
120
120
|
enable_thinking?: boolean
|
|
121
|
+
reasoning_format?: 'none' | 'auto' | 'deepseek'
|
|
121
122
|
add_generation_prompt?: boolean
|
|
122
123
|
now?: string | number
|
|
123
124
|
chat_template_kwargs?: Record<string, string>
|
|
@@ -136,6 +137,7 @@ class LlamaContextWrapper {
|
|
|
136
137
|
parallel_tool_calls: params?.parallel_tool_calls,
|
|
137
138
|
tool_choice: params?.tool_choice,
|
|
138
139
|
enable_thinking: params?.enable_thinking ?? true,
|
|
140
|
+
reasoning_format: params?.reasoning_format ?? 'none',
|
|
139
141
|
add_generation_prompt: params?.add_generation_prompt,
|
|
140
142
|
now: params?.now,
|
|
141
143
|
chat_template_kwargs: params?.chat_template_kwargs
|
|
@@ -252,7 +254,12 @@ class LlamaContextWrapper {
|
|
|
252
254
|
return this.ctx.getLoadedLoraAdapters()
|
|
253
255
|
}
|
|
254
256
|
|
|
255
|
-
initMultimodal(options: {
|
|
257
|
+
initMultimodal(options: {
|
|
258
|
+
path: string
|
|
259
|
+
use_gpu?: boolean
|
|
260
|
+
image_min_tokens?: number
|
|
261
|
+
image_max_tokens?: number
|
|
262
|
+
}): boolean {
|
|
256
263
|
return this.ctx.initMultimodal(options)
|
|
257
264
|
}
|
|
258
265
|
|
package/lib/parallel.ts
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
// Parallel decoding API implementation for llama.node
|
|
2
2
|
import type {
|
|
3
3
|
LlamaContext,
|
|
4
|
-
LlamaCompletionOptions,
|
|
5
4
|
LlamaCompletionToken,
|
|
6
5
|
RerankParams,
|
|
7
6
|
ParallelStatus,
|
|
7
|
+
LlamaParallelCompletionOptions,
|
|
8
8
|
} from './binding'
|
|
9
9
|
import { formatMediaChat } from './utils'
|
|
10
10
|
|
|
@@ -68,7 +68,7 @@ export class LlamaParallelAPI {
|
|
|
68
68
|
* @returns Object with requestId, promise for result, and stop function
|
|
69
69
|
*/
|
|
70
70
|
async completion(
|
|
71
|
-
options:
|
|
71
|
+
options: LlamaParallelCompletionOptions,
|
|
72
72
|
onToken?: (requestId: number, data: LlamaCompletionToken) => void,
|
|
73
73
|
): Promise<{
|
|
74
74
|
requestId: number
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.4.
|
|
4
|
+
"version": "1.4.15",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,20 +72,20 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-darwin-arm64": "1.4.
|
|
76
|
-
"@fugood/node-llama-darwin-x64": "1.4.
|
|
77
|
-
"@fugood/node-llama-linux-arm64": "1.4.
|
|
78
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.4.
|
|
79
|
-
"@fugood/node-llama-linux-arm64-snapdragon": "1.4.
|
|
80
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.4.
|
|
81
|
-
"@fugood/node-llama-linux-x64": "1.4.
|
|
82
|
-
"@fugood/node-llama-linux-x64-cuda": "1.4.
|
|
83
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.4.
|
|
84
|
-
"@fugood/node-llama-win32-arm64": "1.4.
|
|
85
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.4.
|
|
86
|
-
"@fugood/node-llama-win32-x64": "1.4.
|
|
87
|
-
"@fugood/node-llama-win32-x64-cuda": "1.4.
|
|
88
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.4.
|
|
75
|
+
"@fugood/node-llama-darwin-arm64": "1.4.15",
|
|
76
|
+
"@fugood/node-llama-darwin-x64": "1.4.15",
|
|
77
|
+
"@fugood/node-llama-linux-arm64": "1.4.15",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.4.15",
|
|
79
|
+
"@fugood/node-llama-linux-arm64-snapdragon": "1.4.15",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.4.15",
|
|
81
|
+
"@fugood/node-llama-linux-x64": "1.4.15",
|
|
82
|
+
"@fugood/node-llama-linux-x64-cuda": "1.4.15",
|
|
83
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.4.15",
|
|
84
|
+
"@fugood/node-llama-win32-arm64": "1.4.15",
|
|
85
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.4.15",
|
|
86
|
+
"@fugood/node-llama-win32-x64": "1.4.15",
|
|
87
|
+
"@fugood/node-llama-win32-x64-cuda": "1.4.15",
|
|
88
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.4.15"
|
|
89
89
|
},
|
|
90
90
|
"devDependencies": {
|
|
91
91
|
"@babel/preset-env": "^7.24.4",
|
package/scripts/llama.cpp.patch
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
diff --git a/src/llama.cpp/common/CMakeLists.txt b/src/llama.cpp/common/CMakeLists.txt
|
|
2
|
-
index
|
|
2
|
+
index 723973ed7..e4b2c6537 100644
|
|
3
3
|
--- a/src/llama.cpp/common/CMakeLists.txt
|
|
4
4
|
+++ b/src/llama.cpp/common/CMakeLists.txt
|
|
5
|
-
@@ -
|
|
5
|
+
@@ -146,4 +146,11 @@ if (LLAMA_LLGUIDANCE)
|
|
6
6
|
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
|
|
7
7
|
endif ()
|
|
8
8
|
|
|
@@ -13,11 +13,8 @@ index f7b99159e..fa37fed19 100644
|
|
|
13
13
|
+else()
|
|
14
14
|
+ set(LLAMA_COMMON_WIN_LIBS "")
|
|
15
15
|
+endif()
|
|
16
|
-
|
|
16
|
+
+
|
|
17
17
|
+target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} ${LLAMA_COMMON_WIN_LIBS} PUBLIC llama Threads::Threads)
|
|
18
|
-
|
|
19
|
-
#
|
|
20
|
-
# copy the license files
|
|
21
18
|
diff --git a/src/llama.cpp/common/chat-peg-parser.cpp b/src/llama.cpp/common/chat-peg-parser.cpp
|
|
22
19
|
index 1bcba9cd8..b7cd68734 100644
|
|
23
20
|
--- a/src/llama.cpp/common/chat-peg-parser.cpp
|
|
@@ -32,7 +29,7 @@ index 1bcba9cd8..b7cd68734 100644
|
|
|
32
29
|
static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
|
|
33
30
|
int count = 0;
|
|
34
31
|
diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
|
|
35
|
-
index
|
|
32
|
+
index d531388bc..e6712b368 100644
|
|
36
33
|
--- a/src/llama.cpp/common/chat.cpp
|
|
37
34
|
+++ b/src/llama.cpp/common/chat.cpp
|
|
38
35
|
@@ -7,9 +7,6 @@
|
|
@@ -62,7 +59,7 @@ index 22e527bab..c3d0affca 100644
|
|
|
62
59
|
struct templates_params {
|
|
63
60
|
json messages;
|
|
64
61
|
json tools;
|
|
65
|
-
@@ -
|
|
62
|
+
@@ -753,7 +740,7 @@ static std::string apply(
|
|
66
63
|
tmpl_inputs.extra_context.merge_patch(*additional_context);
|
|
67
64
|
}
|
|
68
65
|
// TODO: add flag to control date/time, if only for testing purposes.
|
|
@@ -72,7 +69,7 @@ index 22e527bab..c3d0affca 100644
|
|
|
72
69
|
minja::chat_template_options tmpl_opts;
|
|
73
70
|
// To avoid double BOS / EOS tokens, we're manually removing begining / trailing tokens
|
|
74
71
|
diff --git a/src/llama.cpp/common/chat.h b/src/llama.cpp/common/chat.h
|
|
75
|
-
index
|
|
72
|
+
index 454085e90..e01390cf9 100644
|
|
76
73
|
--- a/src/llama.cpp/common/chat.h
|
|
77
74
|
+++ b/src/llama.cpp/common/chat.h
|
|
78
75
|
@@ -10,7 +10,18 @@
|
|
@@ -96,7 +93,7 @@ index 8bd4a325f..333b3301f 100644
|
|
|
96
93
|
struct common_chat_tool_call {
|
|
97
94
|
std::string name;
|
|
98
95
|
diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
|
|
99
|
-
index
|
|
96
|
+
index 744f0b4ee..04fcebb9e 100644
|
|
100
97
|
--- a/src/llama.cpp/common/common.cpp
|
|
101
98
|
+++ b/src/llama.cpp/common/common.cpp
|
|
102
99
|
@@ -1361,6 +1361,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
|
@@ -108,10 +105,10 @@ index 41b2b6833..fe9ba05aa 100644
|
|
|
108
105
|
mparams.main_gpu = params.main_gpu;
|
|
109
106
|
mparams.split_mode = params.split_mode;
|
|
110
107
|
diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
|
|
111
|
-
index
|
|
108
|
+
index e60087dea..c21797cd8 100644
|
|
112
109
|
--- a/src/llama.cpp/common/common.h
|
|
113
110
|
+++ b/src/llama.cpp/common/common.h
|
|
114
|
-
@@ -
|
|
111
|
+
@@ -311,6 +311,7 @@ struct lr_opt {
|
|
115
112
|
struct ggml_opt_optimizer_params common_opt_lr_pars(void * userdata);
|
|
116
113
|
|
|
117
114
|
struct common_params {
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -595,6 +595,8 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
|
595
595
|
details.Set("nEmbd", llama_model_n_embd(model));
|
|
596
596
|
details.Set("nParams", llama_model_n_params(model));
|
|
597
597
|
details.Set("size", llama_model_size(model));
|
|
598
|
+
details.Set("is_recurrent", llama_model_is_recurrent(model));
|
|
599
|
+
details.Set("is_hybrid", llama_model_is_hybrid(model));
|
|
598
600
|
|
|
599
601
|
Napi::Object chatTemplates = Napi::Object::New(info.Env());
|
|
600
602
|
chatTemplates.Set("llamaChat", _rn_ctx->validateModelChatTemplate(false, nullptr));
|
|
@@ -703,6 +705,7 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
|
|
|
703
705
|
get_option<bool>(params, "parallel_tool_calls", false);
|
|
704
706
|
auto tool_choice = get_option<std::string>(params, "tool_choice", "");
|
|
705
707
|
auto enable_thinking = get_option<bool>(params, "enable_thinking", false);
|
|
708
|
+
auto reasoning_format = get_option<std::string>(params, "reasoning_format", "none");
|
|
706
709
|
auto add_generation_prompt = get_option<bool>(params, "add_generation_prompt", true);
|
|
707
710
|
auto now_str = get_option<std::string>(params, "now", "");
|
|
708
711
|
|
|
@@ -721,7 +724,7 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
|
|
|
721
724
|
try {
|
|
722
725
|
chatParams = _rn_ctx->getFormattedChatWithJinja(
|
|
723
726
|
messages, chat_template, json_schema_str, tools_str,
|
|
724
|
-
parallel_tool_calls, tool_choice, enable_thinking,
|
|
727
|
+
parallel_tool_calls, tool_choice, enable_thinking, reasoning_format,
|
|
725
728
|
add_generation_prompt, now_str, chat_template_kwargs);
|
|
726
729
|
} catch (const nlohmann::json_abi_v3_12_0::detail::parse_error& e) {
|
|
727
730
|
Napi::Error::New(env, e.what()).ThrowAsJavaScriptException();
|
|
@@ -962,7 +965,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
962
965
|
try {
|
|
963
966
|
chatParams = _rn_ctx->getFormattedChatWithJinja(
|
|
964
967
|
json_stringify(messages), chat_template,
|
|
965
|
-
json_schema_str, tools_str, parallel_tool_calls, tool_choice, enable_thinking,
|
|
968
|
+
json_schema_str, tools_str, parallel_tool_calls, tool_choice, enable_thinking, reasoning_format,
|
|
966
969
|
add_generation_prompt, now_str, chat_template_kwargs);
|
|
967
970
|
} catch (const std::exception &e) {
|
|
968
971
|
Napi::Error::New(env, e.what()).ThrowAsJavaScriptException();
|
|
@@ -1330,7 +1333,7 @@ extern "C" void cleanup_logging() {
|
|
|
1330
1333
|
}
|
|
1331
1334
|
|
|
1332
1335
|
|
|
1333
|
-
// initMultimodal(options: { path: string, use_gpu?: boolean }): boolean
|
|
1336
|
+
// initMultimodal(options: { path: string, use_gpu?: boolean, image_min_tokens?: number, image_max_tokens?: number }): boolean
|
|
1334
1337
|
Napi::Value LlamaContext::InitMultimodal(const Napi::CallbackInfo &info) {
|
|
1335
1338
|
Napi::Env env = info.Env();
|
|
1336
1339
|
|
|
@@ -1342,6 +1345,15 @@ Napi::Value LlamaContext::InitMultimodal(const Napi::CallbackInfo &info) {
|
|
|
1342
1345
|
auto options = info[0].As<Napi::Object>();
|
|
1343
1346
|
auto mmproj_path = options.Get("path").ToString().Utf8Value();
|
|
1344
1347
|
auto use_gpu = options.Get("use_gpu").ToBoolean().Value();
|
|
1348
|
+
int image_min_tokens = -1;
|
|
1349
|
+
int image_max_tokens = -1;
|
|
1350
|
+
|
|
1351
|
+
if (options.Has("image_min_tokens") && options.Get("image_min_tokens").IsNumber()) {
|
|
1352
|
+
image_min_tokens = options.Get("image_min_tokens").ToNumber().Int32Value();
|
|
1353
|
+
}
|
|
1354
|
+
if (options.Has("image_max_tokens") && options.Get("image_max_tokens").IsNumber()) {
|
|
1355
|
+
image_max_tokens = options.Get("image_max_tokens").ToNumber().Int32Value();
|
|
1356
|
+
}
|
|
1345
1357
|
|
|
1346
1358
|
if (mmproj_path.empty()) {
|
|
1347
1359
|
Napi::TypeError::New(env, "mmproj path is required")
|
|
@@ -1357,7 +1369,7 @@ Napi::Value LlamaContext::InitMultimodal(const Napi::CallbackInfo &info) {
|
|
|
1357
1369
|
|
|
1358
1370
|
// Disable ctx_shift before initializing multimodal
|
|
1359
1371
|
_rn_ctx->params.ctx_shift = false;
|
|
1360
|
-
bool result = _rn_ctx->initMultimodal(mmproj_path, use_gpu);
|
|
1372
|
+
bool result = _rn_ctx->initMultimodal(mmproj_path, use_gpu, image_min_tokens, image_max_tokens);
|
|
1361
1373
|
if (!result) {
|
|
1362
1374
|
Napi::Error::New(env, "Failed to initialize multimodal context")
|
|
1363
1375
|
.ThrowAsJavaScriptException();
|
|
@@ -111,11 +111,16 @@ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
|
|
|
111
111
|
option(LLAMA_TOOLS_INSTALL "llama: install tools" ${LLAMA_TOOLS_INSTALL_DEFAULT})
|
|
112
112
|
|
|
113
113
|
# 3rd party libs
|
|
114
|
-
option(
|
|
115
|
-
option(
|
|
116
|
-
option(LLAMA_OPENSSL "llama: use openssl to support HTTPS" OFF)
|
|
114
|
+
option(LLAMA_HTTPLIB "llama: httplib for downloading functionality" ON)
|
|
115
|
+
option(LLAMA_OPENSSL "llama: use openssl to support HTTPS" ON)
|
|
117
116
|
option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
|
|
118
117
|
|
|
118
|
+
# deprecated
|
|
119
|
+
option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
|
|
120
|
+
if (LLAMA_CURL)
|
|
121
|
+
message(WARNING "LLAMA_CURL option is deprecated and will be ignored")
|
|
122
|
+
endif()
|
|
123
|
+
|
|
119
124
|
# Required for relocatable CMake package
|
|
120
125
|
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
|
|
121
126
|
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)
|
|
@@ -182,6 +187,9 @@ if (NOT MSVC)
|
|
|
182
187
|
endif()
|
|
183
188
|
endif()
|
|
184
189
|
|
|
190
|
+
include("cmake/license.cmake")
|
|
191
|
+
license_add_file("llama.cpp" "LICENSE")
|
|
192
|
+
|
|
185
193
|
#
|
|
186
194
|
# 3rd-party
|
|
187
195
|
#
|
|
@@ -209,11 +217,6 @@ add_subdirectory(src)
|
|
|
209
217
|
# utils, programs, examples and tests
|
|
210
218
|
#
|
|
211
219
|
|
|
212
|
-
if (NOT LLAMA_BUILD_COMMON)
|
|
213
|
-
message(STATUS "LLAMA_BUILD_COMMON is OFF, disabling LLAMA_CURL")
|
|
214
|
-
set(LLAMA_CURL OFF)
|
|
215
|
-
endif()
|
|
216
|
-
|
|
217
220
|
if (LLAMA_BUILD_COMMON)
|
|
218
221
|
add_subdirectory(common)
|
|
219
222
|
if (LLAMA_HTTPLIB)
|
|
@@ -235,6 +238,19 @@ if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TOOLS)
|
|
|
235
238
|
add_subdirectory(tools)
|
|
236
239
|
endif()
|
|
237
240
|
|
|
241
|
+
# Automatically add all files from the 'licenses' directory
|
|
242
|
+
file(GLOB EXTRA_LICENSES "${CMAKE_SOURCE_DIR}/licenses/LICENSE-*")
|
|
243
|
+
|
|
244
|
+
foreach(FILE_PATH ${EXTRA_LICENSES})
|
|
245
|
+
get_filename_component(FILE_NAME "${FILE_PATH}" NAME)
|
|
246
|
+
string(REGEX REPLACE "^LICENSE-" "" NAME "${FILE_NAME}")
|
|
247
|
+
license_add_file("${NAME}" "${FILE_PATH}")
|
|
248
|
+
endforeach()
|
|
249
|
+
|
|
250
|
+
if (LLAMA_BUILD_COMMON)
|
|
251
|
+
license_generate(common)
|
|
252
|
+
endif()
|
|
253
|
+
|
|
238
254
|
#
|
|
239
255
|
# install
|
|
240
256
|
#
|
|
@@ -60,6 +60,8 @@ add_library(${TARGET} STATIC
|
|
|
60
60
|
common.h
|
|
61
61
|
console.cpp
|
|
62
62
|
console.h
|
|
63
|
+
debug.cpp
|
|
64
|
+
debug.h
|
|
63
65
|
download.cpp
|
|
64
66
|
download.h
|
|
65
67
|
http.h
|
|
@@ -95,17 +97,7 @@ endif()
|
|
|
95
97
|
# TODO: use list(APPEND LLAMA_COMMON_EXTRA_LIBS ...)
|
|
96
98
|
set(LLAMA_COMMON_EXTRA_LIBS build_info)
|
|
97
99
|
|
|
98
|
-
if (
|
|
99
|
-
# Use curl to download model url
|
|
100
|
-
find_package(CURL)
|
|
101
|
-
if (NOT CURL_FOUND)
|
|
102
|
-
message(FATAL_ERROR "Could NOT find CURL. Hint: to disable this feature, set -DLLAMA_CURL=OFF")
|
|
103
|
-
endif()
|
|
104
|
-
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
|
|
105
|
-
include_directories(${CURL_INCLUDE_DIRS})
|
|
106
|
-
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARIES})
|
|
107
|
-
elseif (LLAMA_HTTPLIB)
|
|
108
|
-
# otherwise, use cpp-httplib
|
|
100
|
+
if (LLAMA_HTTPLIB)
|
|
109
101
|
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_HTTPLIB)
|
|
110
102
|
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} cpp-httplib)
|
|
111
103
|
endif()
|
|
@@ -162,26 +154,3 @@ else()
|
|
|
162
154
|
endif()
|
|
163
155
|
|
|
164
156
|
target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} ${LLAMA_COMMON_WIN_LIBS} PUBLIC llama Threads::Threads)
|
|
165
|
-
|
|
166
|
-
#
|
|
167
|
-
# copy the license files
|
|
168
|
-
#
|
|
169
|
-
|
|
170
|
-
# Check if running in GitHub Actions
|
|
171
|
-
if (DEFINED ENV{GITHUB_ACTIONS} AND "$ENV{GITHUB_ACTIONS}" STREQUAL "true")
|
|
172
|
-
message(STATUS "Running inside GitHub Actions - copying license files")
|
|
173
|
-
|
|
174
|
-
# Copy all files from licenses/ to build/bin/
|
|
175
|
-
file(GLOB LICENSE_FILES "${CMAKE_SOURCE_DIR}/licenses/*")
|
|
176
|
-
foreach(LICENSE_FILE ${LICENSE_FILES})
|
|
177
|
-
get_filename_component(FILENAME ${LICENSE_FILE} NAME)
|
|
178
|
-
add_custom_command(
|
|
179
|
-
POST_BUILD
|
|
180
|
-
TARGET ${TARGET}
|
|
181
|
-
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
|
182
|
-
"${LICENSE_FILE}"
|
|
183
|
-
"$<TARGET_FILE_DIR:llama>/${FILENAME}"
|
|
184
|
-
COMMENT "Copying ${FILENAME} to ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")
|
|
185
|
-
message(STATUS "Copying ${LICENSE_FILE} to ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${FILENAME}")
|
|
186
|
-
endforeach()
|
|
187
|
-
endif()
|