@fugood/llama.node 1.4.14 → 1.5.0-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/lib/binding.ts +13 -6
  2. package/lib/index.js +2 -2
  3. package/lib/index.ts +8 -3
  4. package/package.json +15 -15
  5. package/scripts/llama.cpp.patch +77 -65
  6. package/src/LlamaContext.cpp +31 -34
  7. package/src/llama.cpp/CMakeLists.txt +24 -8
  8. package/src/llama.cpp/common/CMakeLists.txt +15 -34
  9. package/src/llama.cpp/common/arg.cpp +59 -10
  10. package/src/llama.cpp/common/chat-parser.cpp +115 -0
  11. package/src/llama.cpp/common/chat.cpp +356 -34
  12. package/src/llama.cpp/common/chat.h +17 -13
  13. package/src/llama.cpp/common/common.cpp +0 -1
  14. package/src/llama.cpp/common/common.h +30 -25
  15. package/src/llama.cpp/common/debug.cpp +165 -0
  16. package/src/llama.cpp/common/debug.h +43 -0
  17. package/src/llama.cpp/common/download.cpp +12 -342
  18. package/src/llama.cpp/common/download.h +6 -0
  19. package/src/llama.cpp/common/jinja/caps.cpp +237 -0
  20. package/src/llama.cpp/common/jinja/caps.h +24 -0
  21. package/src/llama.cpp/common/jinja/lexer.cpp +341 -0
  22. package/src/llama.cpp/common/jinja/lexer.h +157 -0
  23. package/src/llama.cpp/common/jinja/parser.cpp +591 -0
  24. package/src/llama.cpp/common/jinja/parser.h +21 -0
  25. package/src/llama.cpp/common/jinja/runtime.cpp +865 -0
  26. package/src/llama.cpp/common/jinja/runtime.h +628 -0
  27. package/src/llama.cpp/common/jinja/string.cpp +207 -0
  28. package/src/llama.cpp/common/jinja/string.h +58 -0
  29. package/src/llama.cpp/common/jinja/utils.h +49 -0
  30. package/src/llama.cpp/common/jinja/value.cpp +1221 -0
  31. package/src/llama.cpp/common/jinja/value.h +464 -0
  32. package/src/llama.cpp/common/preset.cpp +12 -2
  33. package/src/llama.cpp/common/sampling.cpp +52 -19
  34. package/src/llama.cpp/ggml/include/ggml.h +39 -7
  35. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -0
  36. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +63 -37
  37. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +31 -0
  38. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +18 -0
  39. package/src/llama.cpp/include/llama-cpp.h +3 -1
  40. package/src/llama.cpp/include/llama.h +29 -2
  41. package/src/llama.cpp/src/CMakeLists.txt +1 -0
  42. package/src/llama.cpp/src/llama-adapter.cpp +7 -13
  43. package/src/llama.cpp/src/llama-adapter.h +1 -3
  44. package/src/llama.cpp/src/llama-arch.cpp +35 -0
  45. package/src/llama.cpp/src/llama-arch.h +1 -0
  46. package/src/llama.cpp/src/llama-chat.cpp +20 -0
  47. package/src/llama.cpp/src/llama-chat.h +1 -0
  48. package/src/llama.cpp/src/llama-context.cpp +232 -144
  49. package/src/llama.cpp/src/llama-context.h +10 -0
  50. package/src/llama.cpp/src/llama-cparams.h +2 -0
  51. package/src/llama.cpp/src/llama-graph.cpp +31 -43
  52. package/src/llama.cpp/src/llama-hparams.cpp +0 -36
  53. package/src/llama.cpp/src/llama-hparams.h +38 -1
  54. package/src/llama.cpp/src/llama-kv-cache.cpp +201 -59
  55. package/src/llama.cpp/src/llama-kv-cache.h +0 -2
  56. package/src/llama.cpp/src/llama-mmap.cpp +13 -6
  57. package/src/llama.cpp/src/llama-model-loader.cpp +21 -7
  58. package/src/llama.cpp/src/llama-model.cpp +215 -97
  59. package/src/llama.cpp/src/llama-model.h +3 -2
  60. package/src/llama.cpp/src/llama-sampling.cpp +170 -13
  61. package/src/llama.cpp/src/llama-vocab.cpp +37 -24
  62. package/src/llama.cpp/src/llama-vocab.h +1 -0
  63. package/src/llama.cpp/src/models/exaone-moe.cpp +146 -0
  64. package/src/llama.cpp/src/models/gemma3n-iswa.cpp +13 -3
  65. package/src/llama.cpp/src/models/models.h +13 -2
  66. package/src/llama.cpp/src/models/qwen3next.cpp +198 -182
package/lib/binding.ts CHANGED
@@ -374,24 +374,20 @@ export type ModelInfo = {
374
374
  is_hybrid: boolean
375
375
  chatTemplates: {
376
376
  llamaChat: boolean
377
- minja: {
377
+ jinja: {
378
378
  default: boolean
379
379
  defaultCaps: {
380
380
  tools: boolean
381
381
  toolCalls: boolean
382
- toolResponses: boolean
383
382
  systemRole: boolean
384
383
  parallelToolCalls: boolean
385
- toolCallId: boolean
386
384
  }
387
385
  toolUse: boolean
388
386
  toolUseCaps?: {
389
387
  tools: boolean
390
388
  toolCalls: boolean
391
- toolResponses: boolean
392
389
  systemRole: boolean
393
390
  parallelToolCalls: boolean
394
- toolCallId: boolean
395
391
  }
396
392
  }
397
393
  }
@@ -515,9 +511,20 @@ export interface LlamaContext {
515
511
  /**
516
512
  * Initialize multimodal support with a mmproj file
517
513
  * @param options Object containing path and optional use_gpu flag
514
+ * @param options.path Path to the multimodal projector model file (mmproj)
515
+ * @param options.use_gpu Whether to use GPU for multimodal processing (default: true)
516
+ * @param options.image_min_tokens Minimum number of tokens for image input (for dynamic resolution models)
517
+ * @param options.image_max_tokens Maximum number of tokens for image input (for dynamic resolution models).
518
+ * Lower values reduce memory usage and improve speed for high-resolution images.
519
+ * Recommended: 256-512 for faster inference, up to 4096 for maximum detail.
518
520
  * @returns boolean indicating if initialization was successful
519
521
  */
520
- initMultimodal(options: { path: string; use_gpu?: boolean }): boolean
522
+ initMultimodal(options: {
523
+ path: string
524
+ use_gpu?: boolean
525
+ image_min_tokens?: number
526
+ image_max_tokens?: number
527
+ }): boolean
521
528
 
522
529
  /**
523
530
  * Check if multimodal support is enabled
package/lib/index.js CHANGED
@@ -80,8 +80,8 @@ class LlamaContextWrapper {
80
80
  return this.ctx.getUsedDevices();
81
81
  }
82
82
  isJinjaSupported() {
83
- const { minja } = this.ctx.getModelInfo().chatTemplates;
84
- return !!(minja === null || minja === void 0 ? void 0 : minja.toolUse) || !!(minja === null || minja === void 0 ? void 0 : minja.default);
83
+ const { jinja } = this.ctx.getModelInfo().chatTemplates;
84
+ return !!(jinja === null || jinja === void 0 ? void 0 : jinja.toolUse) || !!(jinja === null || jinja === void 0 ? void 0 : jinja.default);
85
85
  }
86
86
  isLlamaChatSupported() {
87
87
  return !!this.ctx.getModelInfo().chatTemplates.llamaChat;
package/lib/index.ts CHANGED
@@ -100,8 +100,8 @@ class LlamaContextWrapper {
100
100
  }
101
101
 
102
102
  isJinjaSupported(): boolean {
103
- const { minja } = this.ctx.getModelInfo().chatTemplates
104
- return !!minja?.toolUse || !!minja?.default
103
+ const { jinja } = this.ctx.getModelInfo().chatTemplates
104
+ return !!jinja?.toolUse || !!jinja?.default
105
105
  }
106
106
 
107
107
  isLlamaChatSupported(): boolean {
@@ -254,7 +254,12 @@ class LlamaContextWrapper {
254
254
  return this.ctx.getLoadedLoraAdapters()
255
255
  }
256
256
 
257
- initMultimodal(options: { path: string; use_gpu?: boolean }): boolean {
257
+ initMultimodal(options: {
258
+ path: string
259
+ use_gpu?: boolean
260
+ image_min_tokens?: number
261
+ image_max_tokens?: number
262
+ }): boolean {
258
263
  return this.ctx.initMultimodal(options)
259
264
  }
260
265
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.4.14",
4
+ "version": "1.5.0-rc.0",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -72,20 +72,20 @@
72
72
  "CMakeLists.txt"
73
73
  ],
74
74
  "optionalDependencies": {
75
- "@fugood/node-llama-darwin-arm64": "1.4.14",
76
- "@fugood/node-llama-darwin-x64": "1.4.14",
77
- "@fugood/node-llama-linux-arm64": "1.4.14",
78
- "@fugood/node-llama-linux-arm64-cuda": "1.4.14",
79
- "@fugood/node-llama-linux-arm64-snapdragon": "1.4.14",
80
- "@fugood/node-llama-linux-arm64-vulkan": "1.4.14",
81
- "@fugood/node-llama-linux-x64": "1.4.14",
82
- "@fugood/node-llama-linux-x64-cuda": "1.4.14",
83
- "@fugood/node-llama-linux-x64-vulkan": "1.4.14",
84
- "@fugood/node-llama-win32-arm64": "1.4.14",
85
- "@fugood/node-llama-win32-arm64-vulkan": "1.4.14",
86
- "@fugood/node-llama-win32-x64": "1.4.14",
87
- "@fugood/node-llama-win32-x64-cuda": "1.4.14",
88
- "@fugood/node-llama-win32-x64-vulkan": "1.4.14"
75
+ "@fugood/node-llama-darwin-arm64": "1.5.0-rc.0",
76
+ "@fugood/node-llama-darwin-x64": "1.5.0-rc.0",
77
+ "@fugood/node-llama-linux-arm64": "1.5.0-rc.0",
78
+ "@fugood/node-llama-linux-arm64-cuda": "1.5.0-rc.0",
79
+ "@fugood/node-llama-linux-arm64-snapdragon": "1.5.0-rc.0",
80
+ "@fugood/node-llama-linux-arm64-vulkan": "1.5.0-rc.0",
81
+ "@fugood/node-llama-linux-x64": "1.5.0-rc.0",
82
+ "@fugood/node-llama-linux-x64-cuda": "1.5.0-rc.0",
83
+ "@fugood/node-llama-linux-x64-vulkan": "1.5.0-rc.0",
84
+ "@fugood/node-llama-win32-arm64": "1.5.0-rc.0",
85
+ "@fugood/node-llama-win32-arm64-vulkan": "1.5.0-rc.0",
86
+ "@fugood/node-llama-win32-x64": "1.5.0-rc.0",
87
+ "@fugood/node-llama-win32-x64-cuda": "1.5.0-rc.0",
88
+ "@fugood/node-llama-win32-x64-vulkan": "1.5.0-rc.0"
89
89
  },
90
90
  "devDependencies": {
91
91
  "@babel/preset-env": "^7.24.4",
@@ -1,8 +1,8 @@
1
1
  diff --git a/src/llama.cpp/common/CMakeLists.txt b/src/llama.cpp/common/CMakeLists.txt
2
- index f7b99159e..fa37fed19 100644
2
+ index ae02c0bd7..f74d8bb26 100644
3
3
  --- a/src/llama.cpp/common/CMakeLists.txt
4
4
  +++ b/src/llama.cpp/common/CMakeLists.txt
5
- @@ -154,8 +154,14 @@ if (LLAMA_LLGUIDANCE)
5
+ @@ -158,4 +158,11 @@ if (LLAMA_LLGUIDANCE)
6
6
  set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
7
7
  endif ()
8
8
 
@@ -13,11 +13,8 @@ index f7b99159e..fa37fed19 100644
13
13
  +else()
14
14
  + set(LLAMA_COMMON_WIN_LIBS "")
15
15
  +endif()
16
-
16
+ +
17
17
  +target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} ${LLAMA_COMMON_WIN_LIBS} PUBLIC llama Threads::Threads)
18
-
19
- #
20
- # copy the license files
21
18
  diff --git a/src/llama.cpp/common/chat-peg-parser.cpp b/src/llama.cpp/common/chat-peg-parser.cpp
22
19
  index 1bcba9cd8..b7cd68734 100644
23
20
  --- a/src/llama.cpp/common/chat-peg-parser.cpp
@@ -32,74 +29,89 @@ index 1bcba9cd8..b7cd68734 100644
32
29
  static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
33
30
  int count = 0;
34
31
  diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
35
- index 22e527bab..c3d0affca 100644
32
+ index b29544dac..5fa2c6c17 100644
36
33
  --- a/src/llama.cpp/common/chat.cpp
37
34
  +++ b/src/llama.cpp/common/chat.cpp
38
- @@ -7,9 +7,6 @@
39
- #include "log.h"
40
- #include "regex-partial.h"
41
-
42
- -#include <minja/chat-template.hpp>
43
- -#include <minja/minja.hpp>
44
- -
45
- #include <algorithm>
46
- #include <cstdio>
47
- #include <cctype>
48
- @@ -135,16 +132,6 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
49
- return diffs;
35
+ @@ -615,6 +615,37 @@ std::string common_chat_templates_source(const struct common_chat_templates * tm
36
+ return tmpls->template_default->source();
50
37
  }
51
38
 
52
- -typedef minja::chat_template common_chat_template;
53
- -
54
- -struct common_chat_templates {
55
- - bool add_bos;
56
- - bool add_eos;
57
- - bool has_explicit_template; // Model had builtin template or template overridde was specified.
58
- - std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
59
- - std::unique_ptr<common_chat_template> template_tool_use;
60
- -};
61
- -
62
- struct templates_params {
63
- json messages;
64
- json tools;
65
- @@ -752,7 +739,7 @@ static std::string apply(
66
- tmpl_inputs.extra_context.merge_patch(*additional_context);
39
+ +common_chat_template_caps common_chat_templates_get_caps(const struct common_chat_templates * tmpls, const std::string & variant) {
40
+ + common_chat_template_caps result;
41
+ + const common_chat_template * tmpl = nullptr;
42
+ +
43
+ + if (!variant.empty() && variant == "tool_use") {
44
+ + tmpl = tmpls->template_tool_use.get();
45
+ + } else {
46
+ + tmpl = tmpls->template_default.get();
47
+ + }
48
+ +
49
+ + if (tmpl) {
50
+ + auto caps = tmpl->original_caps();
51
+ + result.supports_tools = caps.supports_tools;
52
+ + result.supports_tool_calls = caps.supports_tool_calls;
53
+ + result.supports_system_role = caps.supports_system_role;
54
+ + result.supports_parallel_tool_calls = caps.supports_parallel_tool_calls;
55
+ + }
56
+ +
57
+ + return result;
58
+ +}
59
+ +
60
+ +bool common_chat_templates_has_variant(const struct common_chat_templates * tmpls, const std::string & variant) {
61
+ + if (variant.empty() || variant == "default") {
62
+ + return tmpls->template_default != nullptr;
63
+ + }
64
+ + if (variant == "tool_use") {
65
+ + return tmpls->template_tool_use != nullptr;
66
+ + }
67
+ + return false;
68
+ +}
69
+ +
70
+ common_chat_templates_ptr common_chat_templates_init(
71
+ const struct llama_model * model,
72
+ const std::string & chat_template_override,
73
+ @@ -831,8 +862,9 @@ static std::string apply(
74
+ if (inputs.add_generation_prompt) {
75
+ inp["add_generation_prompt"] = true;
76
+ }
77
+ - if (inp["tools"].is_null()) {
78
+ - inp["tools"] = json::array();
79
+ + // Remove tools key when null, so templates can check "{% if tools is defined %}"
80
+ + if (inp["tools"].is_null() || (inp["tools"].is_array() && inp["tools"].empty())) {
81
+ + inp.erase("tools");
67
82
  }
68
- // TODO: add flag to control date/time, if only for testing purposes.
69
- - // tmpl_inputs.now = std::chrono::system_clock::now();
70
- + tmpl_inputs.now = inputs.now;
71
83
 
72
- minja::chat_template_options tmpl_opts;
73
- // To avoid double BOS / EOS tokens, we're manually removing begining / trailing tokens
84
+ jinja::global_from_json(ctx, inp, inputs.mark_input);
74
85
  diff --git a/src/llama.cpp/common/chat.h b/src/llama.cpp/common/chat.h
75
- index 8bd4a325f..333b3301f 100644
86
+ index 148801738..0317f1ab1 100644
76
87
  --- a/src/llama.cpp/common/chat.h
77
88
  +++ b/src/llama.cpp/common/chat.h
78
- @@ -10,7 +10,18 @@
79
- #include <vector>
80
- #include <map>
89
+ @@ -222,6 +222,20 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
90
+
91
+ bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates);
81
92
 
82
- -struct common_chat_templates;
83
- +#include "minja/chat-template.hpp"
84
- +#include "minja/minja.hpp"
93
+ +// Template capabilities structure (for exposing capabilities to external code)
94
+ +struct common_chat_template_caps {
95
+ + bool supports_tools = true;
96
+ + bool supports_tool_calls = true;
97
+ + bool supports_system_role = true;
98
+ + bool supports_parallel_tool_calls = true;
99
+ +};
85
100
  +
86
- +typedef minja::chat_template common_chat_template;
101
+ +// Get template capabilities for a specific variant ("" for default, "tool_use" for tool_use template)
102
+ +common_chat_template_caps common_chat_templates_get_caps(const struct common_chat_templates * tmpls, const std::string & variant = "");
87
103
  +
88
- +struct common_chat_templates {
89
- + bool add_bos;
90
- + bool add_eos;
91
- + bool has_explicit_template; // Model had builtin template or template overridde was specified.
92
- + std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
93
- + std::unique_ptr<common_chat_template> template_tool_use;
94
- +};
95
-
96
- struct common_chat_tool_call {
97
- std::string name;
104
+ +// Check if a template variant exists
105
+ +bool common_chat_templates_has_variant(const struct common_chat_templates * tmpls, const std::string & variant);
106
+ +
107
+ // Parses a JSON array of messages in OpenAI's chat completion API format.
108
+ // T can be std::string containing JSON or nlohmann::ordered_json
109
+ template <class T> std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const T & messages);
98
110
  diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
99
- index 744f0b4ee..04fcebb9e 100644
111
+ index 26250abb6..72ceddcc7 100644
100
112
  --- a/src/llama.cpp/common/common.cpp
101
113
  +++ b/src/llama.cpp/common/common.cpp
102
- @@ -1361,6 +1361,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
114
+ @@ -1360,6 +1360,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
103
115
  mparams.devices = params.devices.data();
104
116
  }
105
117
 
@@ -108,10 +120,10 @@ index 744f0b4ee..04fcebb9e 100644
108
120
  mparams.main_gpu = params.main_gpu;
109
121
  mparams.split_mode = params.split_mode;
110
122
  diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
111
- index 7794c0268..5b77ae0c3 100644
123
+ index b9566df62..c9425ad2f 100644
112
124
  --- a/src/llama.cpp/common/common.h
113
125
  +++ b/src/llama.cpp/common/common.h
114
- @@ -310,6 +310,7 @@ struct lr_opt {
126
+ @@ -314,6 +314,7 @@ struct lr_opt {
115
127
  struct ggml_opt_optimizer_params common_opt_lr_pars(void * userdata);
116
128
 
117
129
  struct common_params {
@@ -133,10 +145,10 @@ index 7622d0bf4..d2edcfddb 100644
133
145
  check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
134
146
  if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
135
147
  diff --git a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
136
- index 365a24b49..83bf4ee62 100644
148
+ index 5b835c11c..681c00504 100644
137
149
  --- a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
138
150
  +++ b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
139
- @@ -2798,9 +2798,24 @@ static const char * ggml_backend_hexagon_device_get_description(ggml_backend_dev
151
+ @@ -2819,9 +2819,24 @@ static const char * ggml_backend_hexagon_device_get_description(ggml_backend_dev
140
152
  GGML_UNUSED(dev);
141
153
  }
142
154
 
@@ -163,7 +175,7 @@ index 365a24b49..83bf4ee62 100644
163
175
  *total = *free;
164
176
 
165
177
  GGML_UNUSED(dev);
166
- @@ -3010,10 +3025,17 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
178
+ @@ -3056,10 +3071,17 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
167
179
  }
168
180
  }
169
181
 
@@ -185,7 +197,7 @@ index 365a24b49..83bf4ee62 100644
185
197
 
186
198
  GGML_LOG_INFO("ggml-hex: Hexagon Arch version v%d\n", opt_arch);
187
199
 
188
- @@ -3026,6 +3048,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
200
+ @@ -3072,6 +3094,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
189
201
  } catch (const std::exception & exc) {
190
202
  GGML_LOG_ERROR("ggml-hex: failed to create device/session %zu\n", i);
191
203
  devices[i].context = nullptr;
@@ -10,6 +10,7 @@
10
10
  #include "DecodeAudioTokenWorker.h"
11
11
  #include "ggml.h"
12
12
  #include "gguf.h"
13
+ #include "chat.h"
13
14
  #include "json-schema-to-grammar.h"
14
15
  #include <nlohmann/json.hpp>
15
16
  #include "llama-impl.h"
@@ -600,40 +601,27 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
600
601
 
601
602
  Napi::Object chatTemplates = Napi::Object::New(info.Env());
602
603
  chatTemplates.Set("llamaChat", _rn_ctx->validateModelChatTemplate(false, nullptr));
603
- Napi::Object minja = Napi::Object::New(info.Env());
604
- minja.Set("default", _rn_ctx->validateModelChatTemplate(true, nullptr));
604
+ Napi::Object jinja = Napi::Object::New(info.Env());
605
+ jinja.Set("default", _rn_ctx->validateModelChatTemplate(true, nullptr));
605
606
  Napi::Object defaultCaps = Napi::Object::New(info.Env());
606
- auto default_tmpl = _rn_ctx->templates.get()->template_default.get();
607
- auto default_tmpl_caps = default_tmpl->original_caps();
608
- defaultCaps.Set(
609
- "tools",
610
- default_tmpl_caps.supports_tools);
611
- defaultCaps.Set(
612
- "toolCalls",
613
- default_tmpl_caps.supports_tool_calls);
614
- defaultCaps.Set("toolResponses", default_tmpl_caps.supports_tool_responses);
615
- defaultCaps.Set(
616
- "systemRole",
617
- default_tmpl_caps.supports_system_role);
618
- defaultCaps.Set("parallelToolCalls", default_tmpl_caps.supports_parallel_tool_calls);
619
- defaultCaps.Set("toolCallId", default_tmpl_caps.supports_tool_call_id);
620
- minja.Set("defaultCaps", defaultCaps);
621
- minja.Set("toolUse", _rn_ctx->validateModelChatTemplate(true, "tool_use"));
622
- if (_rn_ctx->validateModelChatTemplate(true, "tool_use")) {
607
+ auto default_caps = common_chat_templates_get_caps(_rn_ctx->templates.get(), "");
608
+ defaultCaps.Set("tools", default_caps.supports_tools);
609
+ defaultCaps.Set("toolCalls", default_caps.supports_tool_calls);
610
+ defaultCaps.Set("systemRole", default_caps.supports_system_role);
611
+ defaultCaps.Set("parallelToolCalls", default_caps.supports_parallel_tool_calls);
612
+ jinja.Set("defaultCaps", defaultCaps);
613
+ bool hasToolUse = common_chat_templates_has_variant(_rn_ctx->templates.get(), "tool_use");
614
+ jinja.Set("toolUse", hasToolUse);
615
+ if (hasToolUse) {
623
616
  Napi::Object toolUseCaps = Napi::Object::New(info.Env());
624
- auto tool_use_tmpl = _rn_ctx->templates.get()->template_tool_use.get();
625
- auto tool_use_tmpl_caps = tool_use_tmpl->original_caps();
626
- toolUseCaps.Set(
627
- "tools",
628
- tool_use_tmpl_caps.supports_tools);
629
- toolUseCaps.Set("toolCalls", tool_use_tmpl_caps.supports_tool_calls);
630
- toolUseCaps.Set("toolResponses", tool_use_tmpl_caps.supports_tool_responses);
631
- toolUseCaps.Set("systemRole", tool_use_tmpl_caps.supports_system_role);
632
- toolUseCaps.Set("parallelToolCalls", tool_use_tmpl_caps.supports_parallel_tool_calls);
633
- toolUseCaps.Set("toolCallId", tool_use_tmpl_caps.supports_tool_call_id);
634
- minja.Set("toolUseCaps", toolUseCaps);
635
- }
636
- chatTemplates.Set("minja", minja);
617
+ auto tool_use_caps = common_chat_templates_get_caps(_rn_ctx->templates.get(), "tool_use");
618
+ toolUseCaps.Set("tools", tool_use_caps.supports_tools);
619
+ toolUseCaps.Set("toolCalls", tool_use_caps.supports_tool_calls);
620
+ toolUseCaps.Set("systemRole", tool_use_caps.supports_system_role);
621
+ toolUseCaps.Set("parallelToolCalls", tool_use_caps.supports_parallel_tool_calls);
622
+ jinja.Set("toolUseCaps", toolUseCaps);
623
+ }
624
+ chatTemplates.Set("jinja", jinja);
637
625
  details.Set("chatTemplates", chatTemplates);
638
626
 
639
627
  details.Set("metadata", metadata);
@@ -1333,7 +1321,7 @@ extern "C" void cleanup_logging() {
1333
1321
  }
1334
1322
 
1335
1323
 
1336
- // initMultimodal(options: { path: string, use_gpu?: boolean }): boolean
1324
+ // initMultimodal(options: { path: string, use_gpu?: boolean, image_min_tokens?: number, image_max_tokens?: number }): boolean
1337
1325
  Napi::Value LlamaContext::InitMultimodal(const Napi::CallbackInfo &info) {
1338
1326
  Napi::Env env = info.Env();
1339
1327
 
@@ -1345,6 +1333,15 @@ Napi::Value LlamaContext::InitMultimodal(const Napi::CallbackInfo &info) {
1345
1333
  auto options = info[0].As<Napi::Object>();
1346
1334
  auto mmproj_path = options.Get("path").ToString().Utf8Value();
1347
1335
  auto use_gpu = options.Get("use_gpu").ToBoolean().Value();
1336
+ int image_min_tokens = -1;
1337
+ int image_max_tokens = -1;
1338
+
1339
+ if (options.Has("image_min_tokens") && options.Get("image_min_tokens").IsNumber()) {
1340
+ image_min_tokens = options.Get("image_min_tokens").ToNumber().Int32Value();
1341
+ }
1342
+ if (options.Has("image_max_tokens") && options.Get("image_max_tokens").IsNumber()) {
1343
+ image_max_tokens = options.Get("image_max_tokens").ToNumber().Int32Value();
1344
+ }
1348
1345
 
1349
1346
  if (mmproj_path.empty()) {
1350
1347
  Napi::TypeError::New(env, "mmproj path is required")
@@ -1360,7 +1357,7 @@ Napi::Value LlamaContext::InitMultimodal(const Napi::CallbackInfo &info) {
1360
1357
 
1361
1358
  // Disable ctx_shift before initializing multimodal
1362
1359
  _rn_ctx->params.ctx_shift = false;
1363
- bool result = _rn_ctx->initMultimodal(mmproj_path, use_gpu);
1360
+ bool result = _rn_ctx->initMultimodal(mmproj_path, use_gpu, image_min_tokens, image_max_tokens);
1364
1361
  if (!result) {
1365
1362
  Napi::Error::New(env, "Failed to initialize multimodal context")
1366
1363
  .ThrowAsJavaScriptException();
@@ -111,11 +111,16 @@ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
111
111
  option(LLAMA_TOOLS_INSTALL "llama: install tools" ${LLAMA_TOOLS_INSTALL_DEFAULT})
112
112
 
113
113
  # 3rd party libs
114
- option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON)
115
- option(LLAMA_HTTPLIB "llama: if libcurl is disabled, use httplib to download model from an URL" ON)
116
- option(LLAMA_OPENSSL "llama: use openssl to support HTTPS" OFF)
114
+ option(LLAMA_HTTPLIB "llama: httplib for downloading functionality" ON)
115
+ option(LLAMA_OPENSSL "llama: use openssl to support HTTPS" ON)
117
116
  option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
118
117
 
118
+ # deprecated
119
+ option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
120
+ if (LLAMA_CURL)
121
+ message(WARNING "LLAMA_CURL option is deprecated and will be ignored")
122
+ endif()
123
+
119
124
  # Required for relocatable CMake package
120
125
  include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
121
126
  include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)
@@ -182,6 +187,9 @@ if (NOT MSVC)
182
187
  endif()
183
188
  endif()
184
189
 
190
+ include("cmake/license.cmake")
191
+ license_add_file("llama.cpp" "LICENSE")
192
+
185
193
  #
186
194
  # 3rd-party
187
195
  #
@@ -209,11 +217,6 @@ add_subdirectory(src)
209
217
  # utils, programs, examples and tests
210
218
  #
211
219
 
212
- if (NOT LLAMA_BUILD_COMMON)
213
- message(STATUS "LLAMA_BUILD_COMMON is OFF, disabling LLAMA_CURL")
214
- set(LLAMA_CURL OFF)
215
- endif()
216
-
217
220
  if (LLAMA_BUILD_COMMON)
218
221
  add_subdirectory(common)
219
222
  if (LLAMA_HTTPLIB)
@@ -235,6 +238,19 @@ if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TOOLS)
235
238
  add_subdirectory(tools)
236
239
  endif()
237
240
 
241
+ # Automatically add all files from the 'licenses' directory
242
+ file(GLOB EXTRA_LICENSES "${CMAKE_SOURCE_DIR}/licenses/LICENSE-*")
243
+
244
+ foreach(FILE_PATH ${EXTRA_LICENSES})
245
+ get_filename_component(FILE_NAME "${FILE_PATH}" NAME)
246
+ string(REGEX REPLACE "^LICENSE-" "" NAME "${FILE_NAME}")
247
+ license_add_file("${NAME}" "${FILE_PATH}")
248
+ endforeach()
249
+
250
+ if (LLAMA_BUILD_COMMON)
251
+ license_generate(common)
252
+ endif()
253
+
238
254
  #
239
255
  # install
240
256
  #
@@ -60,6 +60,8 @@ add_library(${TARGET} STATIC
60
60
  common.h
61
61
  console.cpp
62
62
  console.h
63
+ debug.cpp
64
+ debug.h
63
65
  download.cpp
64
66
  download.h
65
67
  http.h
@@ -83,6 +85,18 @@ add_library(${TARGET} STATIC
83
85
  speculative.h
84
86
  unicode.cpp
85
87
  unicode.h
88
+ jinja/lexer.cpp
89
+ jinja/lexer.h
90
+ jinja/parser.cpp
91
+ jinja/parser.h
92
+ jinja/runtime.cpp
93
+ jinja/runtime.h
94
+ jinja/value.cpp
95
+ jinja/value.h
96
+ jinja/string.cpp
97
+ jinja/string.h
98
+ jinja/caps.cpp
99
+ jinja/caps.h
86
100
  )
87
101
 
88
102
  target_include_directories(${TARGET} PUBLIC . ../vendor)
@@ -95,17 +109,7 @@ endif()
95
109
  # TODO: use list(APPEND LLAMA_COMMON_EXTRA_LIBS ...)
96
110
  set(LLAMA_COMMON_EXTRA_LIBS build_info)
97
111
 
98
- if (LLAMA_CURL)
99
- # Use curl to download model url
100
- find_package(CURL)
101
- if (NOT CURL_FOUND)
102
- message(FATAL_ERROR "Could NOT find CURL. Hint: to disable this feature, set -DLLAMA_CURL=OFF")
103
- endif()
104
- target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
105
- include_directories(${CURL_INCLUDE_DIRS})
106
- set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARIES})
107
- elseif (LLAMA_HTTPLIB)
108
- # otherwise, use cpp-httplib
112
+ if (LLAMA_HTTPLIB)
109
113
  target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_HTTPLIB)
110
114
  set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} cpp-httplib)
111
115
  endif()
@@ -162,26 +166,3 @@ else()
162
166
  endif()
163
167
 
164
168
  target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} ${LLAMA_COMMON_WIN_LIBS} PUBLIC llama Threads::Threads)
165
-
166
- #
167
- # copy the license files
168
- #
169
-
170
- # Check if running in GitHub Actions
171
- if (DEFINED ENV{GITHUB_ACTIONS} AND "$ENV{GITHUB_ACTIONS}" STREQUAL "true")
172
- message(STATUS "Running inside GitHub Actions - copying license files")
173
-
174
- # Copy all files from licenses/ to build/bin/
175
- file(GLOB LICENSE_FILES "${CMAKE_SOURCE_DIR}/licenses/*")
176
- foreach(LICENSE_FILE ${LICENSE_FILES})
177
- get_filename_component(FILENAME ${LICENSE_FILE} NAME)
178
- add_custom_command(
179
- POST_BUILD
180
- TARGET ${TARGET}
181
- COMMAND ${CMAKE_COMMAND} -E copy_if_different
182
- "${LICENSE_FILE}"
183
- "$<TARGET_FILE_DIR:llama>/${FILENAME}"
184
- COMMENT "Copying ${FILENAME} to ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")
185
- message(STATUS "Copying ${LICENSE_FILE} to ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${FILENAME}")
186
- endforeach()
187
- endif()