@fugood/llama.node 1.3.0-rc.5 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CMakeLists.txt CHANGED
@@ -64,13 +64,23 @@ endif()
64
64
  # Improve speed
65
65
  if(CMAKE_BUILD_TYPE STREQUAL "Release")
66
66
  if (MSVC)
67
- if (NOT GGML_VULKAN)
67
+ # Enable parallel compilation for all MSVC builds
68
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
69
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /MP")
70
+
71
+ if (NOT GGML_VULKAN AND NOT GGML_CUDA)
72
+ # Full optimization with LTCG for default builds
68
73
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /Ob2 /Oi /Ot /Oy /GL")
69
74
  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 /Ob2 /Oi /Ot /Oy /GL")
70
75
  set(CMAKE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS} /LTCG")
71
- else()
76
+ elseif(GGML_VULKAN)
77
+ # Reduced optimization for Vulkan builds
72
78
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O1 /Ob1 /bigobj")
73
79
  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O1 /Ob1 /bigobj")
80
+ else()
81
+ # Faster linking for CUDA builds (no LTCG)
82
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /Ob2 /Oi")
83
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 /Ob2 /Oi")
74
84
  endif()
75
85
  else()
76
86
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -funroll-loops -flto=auto")
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.3.0-rc.5",
4
+ "version": "1.3.0",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -72,19 +72,19 @@
72
72
  "CMakeLists.txt"
73
73
  ],
74
74
  "optionalDependencies": {
75
- "@fugood/node-llama-linux-x64": "1.3.0-rc.5",
76
- "@fugood/node-llama-linux-x64-vulkan": "1.3.0-rc.5",
77
- "@fugood/node-llama-linux-x64-cuda": "1.3.0-rc.5",
78
- "@fugood/node-llama-linux-arm64": "1.3.0-rc.5",
79
- "@fugood/node-llama-linux-arm64-vulkan": "1.3.0-rc.5",
80
- "@fugood/node-llama-linux-arm64-cuda": "1.3.0-rc.5",
81
- "@fugood/node-llama-win32-x64": "1.3.0-rc.5",
82
- "@fugood/node-llama-win32-x64-vulkan": "1.3.0-rc.5",
83
- "@fugood/node-llama-win32-x64-cuda": "1.3.0-rc.5",
84
- "@fugood/node-llama-win32-arm64": "1.3.0-rc.5",
85
- "@fugood/node-llama-win32-arm64-vulkan": "1.3.0-rc.5",
86
- "@fugood/node-llama-darwin-x64": "1.3.0-rc.5",
87
- "@fugood/node-llama-darwin-arm64": "1.3.0-rc.5"
75
+ "@fugood/node-llama-linux-x64": "1.3.0",
76
+ "@fugood/node-llama-linux-x64-vulkan": "1.3.0",
77
+ "@fugood/node-llama-linux-x64-cuda": "1.3.0",
78
+ "@fugood/node-llama-linux-arm64": "1.3.0",
79
+ "@fugood/node-llama-linux-arm64-vulkan": "1.3.0",
80
+ "@fugood/node-llama-linux-arm64-cuda": "1.3.0",
81
+ "@fugood/node-llama-win32-x64": "1.3.0",
82
+ "@fugood/node-llama-win32-x64-vulkan": "1.3.0",
83
+ "@fugood/node-llama-win32-x64-cuda": "1.3.0",
84
+ "@fugood/node-llama-win32-arm64": "1.3.0",
85
+ "@fugood/node-llama-win32-arm64-vulkan": "1.3.0",
86
+ "@fugood/node-llama-darwin-x64": "1.3.0",
87
+ "@fugood/node-llama-darwin-arm64": "1.3.0"
88
88
  },
89
89
  "devDependencies": {
90
90
  "@babel/preset-env": "^7.24.4",
@@ -21,7 +21,7 @@ index fe290bf8f..d377e29b9 100644
21
21
 
22
22
  #
23
23
  diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
24
- index 8587140e1..7931a31a1 100644
24
+ index 63583fb22..f8be20148 100644
25
25
  --- a/src/llama.cpp/common/chat.cpp
26
26
  +++ b/src/llama.cpp/common/chat.cpp
27
27
  @@ -6,9 +6,6 @@
@@ -31,10 +31,10 @@ index 8587140e1..7931a31a1 100644
31
31
  -#include <minja/chat-template.hpp>
32
32
  -#include <minja/minja.hpp>
33
33
  -
34
+ #include <algorithm>
34
35
  #include <cstdio>
35
- #include <exception>
36
- #include <iostream>
37
- @@ -123,16 +120,6 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
36
+ #include <cctype>
37
+ @@ -126,16 +123,6 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
38
38
  return diffs;
39
39
  }
40
40
 
@@ -51,18 +51,17 @@ index 8587140e1..7931a31a1 100644
51
51
  struct templates_params {
52
52
  json messages;
53
53
  json tools;
54
- @@ -807,8 +794,7 @@ static std::string apply(
55
- if (additional_context) {
54
+ @@ -812,7 +799,7 @@ static std::string apply(
56
55
  tmpl_inputs.extra_context.merge_patch(*additional_context);
57
56
  }
58
- - // TODO: add flag to control date/time, if only for testing purposes.
57
+ // TODO: add flag to control date/time, if only for testing purposes.
59
58
  - // tmpl_inputs.now = std::chrono::system_clock::now();
60
59
  + tmpl_inputs.now = inputs.now;
61
60
 
62
61
  minja::chat_template_options tmpl_opts;
63
62
  // To avoid double BOS / EOS tokens, we're manually removing begining / trailing tokens
64
63
  diff --git a/src/llama.cpp/common/chat.h b/src/llama.cpp/common/chat.h
65
- index f7b36ec71..c07429f08 100644
64
+ index 50efb0d4e..f471a84c7 100644
66
65
  --- a/src/llama.cpp/common/chat.h
67
66
  +++ b/src/llama.cpp/common/chat.h
68
67
  @@ -9,7 +9,18 @@
@@ -98,7 +97,7 @@ index b0591e84b..93759f884 100644
98
97
  mparams.split_mode = params.split_mode;
99
98
  mparams.tensor_split = params.tensor_split;
100
99
  diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
101
- index 040a44ebd..37ad69173 100644
100
+ index a8cb630ea..0919ec5d3 100644
102
101
  --- a/src/llama.cpp/common/common.h
103
102
  +++ b/src/llama.cpp/common/common.h
104
103
  @@ -274,6 +274,7 @@ struct lr_opt {
@@ -110,7 +109,7 @@ index 040a44ebd..37ad69173 100644
110
109
  int32_t n_ctx = 4096; // context size
111
110
  int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
112
111
  diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
113
- index 42041b717..371752718 100644
112
+ index 34323afa0..1a6924db0 100644
114
113
  --- a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
115
114
  +++ b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
116
115
  @@ -106,7 +106,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
@@ -123,10 +122,10 @@ index 42041b717..371752718 100644
123
122
  check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
124
123
  if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
125
124
  diff --git a/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt
126
- index 83a83887b..8ae962b29 100644
125
+ index de01336cd..29b1a043d 100644
127
126
  --- a/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt
128
127
  +++ b/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt
129
- @@ -112,7 +112,7 @@ if (Vulkan_FOUND)
128
+ @@ -121,7 +121,7 @@ if (Vulkan_FOUND)
130
129
  endif()
131
130
 
132
131
  # Set up toolchain for host compilation whether cross-compiling or not
@@ -135,7 +134,7 @@ index 83a83887b..8ae962b29 100644
135
134
  if (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN)
136
135
  set(HOST_CMAKE_TOOLCHAIN_FILE ${GGML_VULKAN_SHADERS_GEN_TOOLCHAIN})
137
136
  else()
138
- @@ -132,7 +132,7 @@ if (Vulkan_FOUND)
137
+ @@ -141,7 +141,7 @@ if (Vulkan_FOUND)
139
138
 
140
139
  include(ExternalProject)
141
140
 
@@ -3248,7 +3248,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
3248
3248
  ).set_examples({LLAMA_EXAMPLE_EMBEDDING}));
3249
3249
  add_opt(common_arg(
3250
3250
  {"--embd-output-format"}, "FORMAT",
3251
- "empty = default, \"array\" = [[],[]...], \"json\" = openai style, \"json+\" = same \"json\" + cosine similarity matrix",
3251
+ "empty = default, \"array\" = [[],[]...], \"json\" = openai style, \"json+\" = same \"json\" + cosine similarity matrix, \"raw\" = plain whitespace-delimited output (one embedding per line)",
3252
3252
  [](common_params & params, const std::string & value) {
3253
3253
  params.embd_out = value;
3254
3254
  }
@@ -3435,7 +3435,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
3435
3435
  [](common_params & params) {
3436
3436
  params.use_jinja = true;
3437
3437
  }
3438
- ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_JINJA"));
3438
+ ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_MTMD}).set_env("LLAMA_ARG_JINJA"));
3439
3439
  add_opt(common_arg(
3440
3440
  {"--reasoning-format"}, "FORMAT",
3441
3441
  "controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:\n"
@@ -6,8 +6,11 @@
6
6
  #include "log.h"
7
7
  #include "regex-partial.h"
8
8
 
9
+ #include <algorithm>
9
10
  #include <cstdio>
11
+ #include <cctype>
10
12
  #include <exception>
13
+ #include <functional>
11
14
  #include <iostream>
12
15
  #include <optional>
13
16
  #include <stdexcept>
@@ -627,6 +630,7 @@ const char * common_chat_format_name(common_chat_format format) {
627
630
  case COMMON_CHAT_FORMAT_SEED_OSS: return "Seed-OSS";
628
631
  case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";
629
632
  case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";
633
+ case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools";
630
634
  default:
631
635
  throw std::runtime_error("Unknown chat format");
632
636
  }
@@ -794,6 +798,7 @@ static std::string apply(
794
798
  if (additional_context) {
795
799
  tmpl_inputs.extra_context.merge_patch(*additional_context);
796
800
  }
801
+ // TODO: add flag to control date/time, if only for testing purposes.
797
802
  tmpl_inputs.now = inputs.now;
798
803
 
799
804
  minja::chat_template_options tmpl_opts;
@@ -972,6 +977,126 @@ static common_chat_params common_chat_params_init_mistral_nemo(const common_chat
972
977
  return data;
973
978
  }
974
979
 
980
+
981
+ // Case-insensitive find
982
+ static size_t ifind_string(const std::string & haystack, const std::string & needle, size_t pos = 0) {
983
+ auto it = std::search(
984
+ haystack.begin() + pos, haystack.end(),
985
+ needle.begin(), needle.end(),
986
+ [](char a, char b) { return std::tolower(a) == std::tolower(b); }
987
+ );
988
+ return (it == haystack.end()) ? std::string::npos : std::distance(haystack.begin(), it);
989
+ }
990
+
991
+ static common_chat_params common_chat_params_init_lfm2(const common_chat_template & tmpl, const struct templates_params & inputs) {
992
+ common_chat_params data;
993
+ const auto is_json_schema_provided = !inputs.json_schema.is_null();
994
+ const auto is_grammar_provided = !inputs.grammar.empty();
995
+ const auto are_tools_provided = inputs.tools.is_array() && !inputs.tools.empty();
996
+
997
+ // the logic requires potentially modifying the messages
998
+ auto tweaked_messages = inputs.messages;
999
+
1000
+ auto replace_json_schema_marker = [](json & messages) -> bool {
1001
+ static std::string marker1 = "force json schema.\n";
1002
+ static std::string marker2 = "force json schema.";
1003
+
1004
+ if (messages.empty() || messages.at(0).at("role") != "system") {
1005
+ return false;
1006
+ }
1007
+
1008
+ std::string content = messages.at(0).at("content");
1009
+
1010
+ for (const auto & marker : {marker1, marker2}) {
1011
+ const auto pos = ifind_string(content, marker);
1012
+ if (pos != std::string::npos) {
1013
+ content.replace(pos, marker.length(), "");
1014
+ // inject modified content back into the messages
1015
+ messages.at(0).at("content") = content;
1016
+ return true;
1017
+ }
1018
+ }
1019
+
1020
+ return false;
1021
+ };
1022
+
1023
+ // Lfm2 model does not natively work with json, but can generally understand the tools structure
1024
+ //
1025
+ // Example of the pytorch dialog structure:
1026
+ // <|startoftext|><|im_start|>system
1027
+ // List of tools: <|tool_list_start|>[{"name": "get_candidate_status", "description": "Retrieves the current status of a candidate in the recruitment process", "parameters": {"type": "object", "properties": {"candidate_id": {"type": "string", "description": "Unique identifier for the candidate"}}, "required": ["candidate_id"]}}]<|tool_list_end|><|im_end|>
1028
+ // <|im_start|>user
1029
+ // What is the current status of candidate ID 12345?<|im_end|>
1030
+ // <|im_start|>assistant
1031
+ // <|tool_call_start|>[get_candidate_status(candidate_id="12345")]<|tool_call_end|>Checking the current status of candidate ID 12345.<|im_end|>
1032
+ // <|im_start|>tool
1033
+ // <|tool_response_start|>{"candidate_id": "12345", "status": "Interview Scheduled", "position": "Clinical Research Associate", "date": "2023-11-20"}<|tool_response_end|><|im_end|>
1034
+ // <|im_start|>assistant
1035
+ // The candidate with ID 12345 is currently in the "Interview Scheduled" stage for the position of Clinical Research Associate, with an interview date set for 2023-11-20.<|im_end|>
1036
+ //
1037
+ // For the llama server compatibility with json tools semantic,
1038
+ // the client can add "Follow json schema." line into the system message prompt to force the json output.
1039
+ //
1040
+ if (are_tools_provided && (is_json_schema_provided || is_grammar_provided)) {
1041
+ // server/utils.hpp prohibits that branch for the custom grammar anyways
1042
+ throw std::runtime_error("Tools call must not use \"json_schema\" or \"grammar\", use non-tool invocation if you want to use custom grammar");
1043
+ } else if (are_tools_provided && replace_json_schema_marker(tweaked_messages)) {
1044
+ LOG_INF("%s: Using tools to build a grammar\n", __func__);
1045
+
1046
+ data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1047
+ auto schemas = json::array();
1048
+ foreach_function(inputs.tools, [&](const json & tool) {
1049
+ const auto & function = tool.at("function");
1050
+ schemas.push_back({
1051
+ {"type", "object"},
1052
+ {"properties", {
1053
+ {"name", {
1054
+ {"type", "string"},
1055
+ {"const", function.at("name")},
1056
+ }},
1057
+ {"arguments", function.at("parameters")},
1058
+ }},
1059
+ {"required", json::array({"name", "arguments", "id"})},
1060
+ });
1061
+ });
1062
+ auto schema = json {
1063
+ {"type", "array"},
1064
+ {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
1065
+ {"minItems", 1},
1066
+ };
1067
+ if (!inputs.parallel_tool_calls) {
1068
+ schema["maxItems"] = 1;
1069
+ }
1070
+
1071
+ builder.add_rule("root", "\"<|tool_call_start|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tool_call_end|>\"");
1072
+ });
1073
+ // model has no concept of tool selection mode choice,
1074
+ // if the system prompt rendered correctly it will produce a tool call
1075
+ // the grammar goes inside the tool call body
1076
+ data.grammar_lazy = true;
1077
+ data.grammar_triggers = {{COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, "\\s*<\\|tool_call_start\\|>\\s*\\["}};
1078
+ data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
1079
+ data.format = COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS;
1080
+ } else if (are_tools_provided && (!is_json_schema_provided && !is_grammar_provided)) {
1081
+ LOG_INF("%s: Using tools without json schema or grammar\n", __func__);
1082
+ // output those tokens
1083
+ data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
1084
+ } else if (is_json_schema_provided) {
1085
+ LOG_INF("%s: Using provided json schema to build a grammar\n", __func__);
1086
+ data.grammar = json_schema_to_grammar(inputs.json_schema);
1087
+ } else if (is_grammar_provided) {
1088
+ LOG_INF("%s: Using provided grammar\n", __func__);
1089
+ data.grammar = inputs.grammar;
1090
+ } else {
1091
+ LOG_INF("%s: Using content relying on the template\n", __func__);
1092
+ }
1093
+
1094
+ data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);
1095
+ LOG_DBG("%s: Prompt: %s\n", __func__, data.prompt.c_str());
1096
+
1097
+ return data;
1098
+ }
1099
+
975
1100
  static common_chat_params common_chat_params_init_magistral(const common_chat_template & tmpl, const struct templates_params & inputs) {
976
1101
  common_chat_params data;
977
1102
  data.prompt = apply(tmpl, inputs);
@@ -2485,6 +2610,71 @@ static void common_chat_parse_apertus(common_chat_msg_parser & builder) {
2485
2610
  builder.add_content(builder.consume_rest());
2486
2611
  }
2487
2612
 
2613
+
2614
+ static void common_chat_parse_lfm2(common_chat_msg_parser & builder) {
2615
+ if (!builder.syntax().parse_tool_calls) {
2616
+ builder.add_content(builder.consume_rest());
2617
+ return;
2618
+ }
2619
+
2620
+ // LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|>
2621
+ static const common_regex tool_call_start_regex(regex_escape("<|tool_call_start|>"));
2622
+ static const common_regex tool_call_end_regex(regex_escape("<|tool_call_end|>"));
2623
+
2624
+ // Loop through all tool calls
2625
+ while (auto res = builder.try_find_regex(tool_call_start_regex, std::string::npos, /* add_prelude_to_content= */ true)) {
2626
+ builder.move_to(res->groups[0].end);
2627
+
2628
+ // Parse JSON array format: [{"name": "...", "arguments": {...}}]
2629
+ auto tool_calls_data = builder.consume_json();
2630
+
2631
+ // Consume end marker
2632
+ builder.consume_spaces();
2633
+ if (!builder.try_consume_regex(tool_call_end_regex)) {
2634
+ throw common_chat_msg_partial_exception("Expected <|tool_call_end|>");
2635
+ }
2636
+
2637
+ // Process each tool call in the array
2638
+ if (tool_calls_data.json.is_array()) {
2639
+ for (const auto & tool_call : tool_calls_data.json) {
2640
+ if (!tool_call.is_object()) {
2641
+ throw common_chat_msg_partial_exception("Tool call must be an object");
2642
+ }
2643
+
2644
+ if (!tool_call.contains("name")) {
2645
+ throw common_chat_msg_partial_exception("Tool call missing 'name' field");
2646
+ }
2647
+
2648
+ std::string function_name = tool_call.at("name");
2649
+ std::string arguments = "{}";
2650
+
2651
+ if (tool_call.contains("arguments")) {
2652
+ if (tool_call.at("arguments").is_object()) {
2653
+ arguments = tool_call.at("arguments").dump();
2654
+ } else if (tool_call.at("arguments").is_string()) {
2655
+ arguments = tool_call.at("arguments");
2656
+ }
2657
+ }
2658
+
2659
+ if (!builder.add_tool_call(function_name, "", arguments)) {
2660
+ throw common_chat_msg_partial_exception("Incomplete tool call");
2661
+ }
2662
+ }
2663
+ } else {
2664
+ throw common_chat_msg_partial_exception("Expected JSON array for tool calls");
2665
+ }
2666
+
2667
+ // Consume any trailing whitespace after this tool call
2668
+ builder.consume_spaces();
2669
+ }
2670
+
2671
+ // Consume any remaining content after all tool calls
2672
+ auto remaining = builder.consume_rest();
2673
+ if (!string_strip(remaining).empty()) {
2674
+ builder.add_content(remaining);
2675
+ }
2676
+ }
2677
+
2488
2678
  static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
2489
2679
  // Parse thinking tags first - this handles the main reasoning content
2490
2680
  builder.try_parse_reasoning("<seed:think>", "</seed:think>");
@@ -2734,6 +2924,12 @@ static common_chat_params common_chat_templates_apply_jinja(
2734
2924
  return common_chat_params_init_apertus(tmpl, params);
2735
2925
  }
2736
2926
 
2927
+ // LFM2 (w/ tools)
2928
+ if (src.find("List of tools: <|tool_list_start|>[") != std::string::npos &&
2929
+ src.find("]<|tool_list_end|>") != std::string::npos) {
2930
+ return common_chat_params_init_lfm2(tmpl, params);
2931
+ }
2932
+
2737
2933
  // Use generic handler when mixing tools + JSON schema.
2738
2934
  // TODO: support that mix in handlers below.
2739
2935
  if ((params.tools.is_array() && params.json_schema.is_object())) {
@@ -2912,6 +3108,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
2912
3108
  case COMMON_CHAT_FORMAT_APERTUS:
2913
3109
  common_chat_parse_apertus(builder);
2914
3110
  break;
3111
+ case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
3112
+ common_chat_parse_lfm2(builder);
3113
+ break;
2915
3114
  default:
2916
3115
  throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
2917
3116
  }
@@ -127,6 +127,7 @@ enum common_chat_format {
127
127
  COMMON_CHAT_FORMAT_SEED_OSS,
128
128
  COMMON_CHAT_FORMAT_NEMOTRON_V2,
129
129
  COMMON_CHAT_FORMAT_APERTUS,
130
+ COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS,
130
131
 
131
132
  COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
132
133
  };
@@ -601,7 +601,10 @@ private:
601
601
  }
602
602
 
603
603
  std::string _resolve_ref(const std::string & ref) {
604
- std::string ref_name = ref.substr(ref.find_last_of('/') + 1);
604
+ auto it = ref.find('#');
605
+ std::string ref_fragment = it != std::string::npos ? ref.substr(it + 1) : ref;
606
+ static const std::regex nonalphanumeric_regex(R"([^a-zA-Z0-9-]+)");
607
+ std::string ref_name = "ref" + std::regex_replace(ref_fragment, nonalphanumeric_regex, "-");
605
608
  if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) {
606
609
  _refs_being_resolved.insert(ref);
607
610
  json resolved = _refs[ref];
@@ -774,11 +777,24 @@ public:
774
777
  std::vector<std::string> tokens = string_split(pointer, "/");
775
778
  for (size_t i = 1; i < tokens.size(); ++i) {
776
779
  std::string sel = tokens[i];
777
- if (target.is_null() || !target.contains(sel)) {
780
+ if (target.is_object() && target.contains(sel)) {
781
+ target = target[sel];
782
+ } else if (target.is_array()) {
783
+ size_t sel_index;
784
+ try {
785
+ sel_index = std::stoul(sel);
786
+ } catch (const std::invalid_argument & e) {
787
+ sel_index = target.size();
788
+ }
789
+ if (sel_index >= target.size()) {
790
+ _errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump());
791
+ return;
792
+ }
793
+ target = target[sel_index];
794
+ } else {
778
795
  _errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump());
779
796
  return;
780
797
  }
781
- target = target[sel];
782
798
  }
783
799
  _refs[ref] = target;
784
800
  }
@@ -251,6 +251,8 @@ option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adr
251
251
  set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
252
252
  "gmml: OpenCL API version to target")
253
253
 
254
+ option(GGML_HEXAGON "ggml: enable Hexagon backend" OFF)
255
+
254
256
  # toolchain for vulkan-shaders-gen
255
257
  set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
256
258
 
@@ -0,0 +1,19 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+ #include "ggml-backend.h"
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ // backend API
11
+ GGML_BACKEND_API ggml_backend_t ggml_backend_hexagon_init(void);
12
+
13
+ GGML_BACKEND_API bool ggml_backend_is_hexagon(ggml_backend_t backend);
14
+
15
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_hexagon_reg(void);
16
+
17
+ #ifdef __cplusplus
18
+ }
19
+ #endif
@@ -307,6 +307,10 @@ function(ggml_add_cpu_backend_variant tag_name)
307
307
  foreach (feat ${ARGN})
308
308
  set(GGML_INTERNAL_${feat} ON)
309
309
  endforeach()
310
+ elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
311
+ foreach (feat ${ARGN})
312
+ set(GGML_INTERNAL_${feat} ON)
313
+ endforeach()
310
314
  endif()
311
315
 
312
316
  ggml_add_cpu_backend_variant_impl(${tag_name})
@@ -371,6 +375,14 @@ if (GGML_CPU_ALL_VARIANTS)
371
375
  else()
372
376
  message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}")
373
377
  endif()
378
+ elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
379
+ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
380
+ ggml_add_cpu_backend_variant(s390x_z15 Z15 VXE)
381
+ # ggml_add_cpu_backend_variant(s390x_z16 Z16 VXE)
382
+ # ggml_add_cpu_backend_variant(s390x_z17 Z17 VXE)
383
+ else()
384
+ message(FATAL_ERROR "Unsupported s390x target OS: ${CMAKE_SYSTEM_NAME}")
385
+ endif()
374
386
  else()
375
387
  message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
376
388
  endif()
@@ -390,6 +402,7 @@ ggml_add_backend(Vulkan)
390
402
  ggml_add_backend(WebGPU)
391
403
  ggml_add_backend(zDNN)
392
404
  ggml_add_backend(OpenCL)
405
+ ggml_add_backend(Hexagon)
393
406
 
394
407
  foreach (target ggml-base ggml)
395
408
  target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
@@ -466,29 +466,45 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
466
466
  list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d)
467
467
  elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
468
468
  message(STATUS "s390x detected")
469
- list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/s390/quants.c)
470
- file(READ "/proc/cpuinfo" CPUINFO_CONTENTS)
471
- string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS})
472
-
473
- # TODO: Separation to determine activation of VX/VXE/VXE2
474
- if (${S390X_M} MATCHES "8561|8562")
475
- message(STATUS "z15 target")
476
- list(APPEND ARCH_FLAGS -march=z15)
477
- elseif (${S390X_M} MATCHES "3931")
478
- message(STATUS "z16 target")
479
- list(APPEND ARCH_FLAGS -march=z16)
480
- elseif (${S390X_M} MATCHES "9175|9176")
481
- # NOTE: Only available from GCC 15.1.0 onwards. Any z17 machine with compile issues must first verify their GCC version.
482
- # binutils must also be updated to the latest for the -march=z17 flag to work. Otherwise, use -march=arch15.
483
- message(STATUS "z17 target")
484
- list(APPEND ARCH_FLAGS -march=arch15)
485
- else()
486
- message(STATUS "Unknown target")
487
- message(WARNING "Unknown target. If you are compiling for z14 and earlier, you might have to add -DGGML_VXE=OFF.")
488
- list(APPEND ARCH_FLAGS -march=native -mtune=native)
469
+ list(APPEND GGML_CPU_SOURCES
470
+ ggml-cpu/arch/s390/quants.c)
471
+
472
+ # for native compilation
473
+ if (GGML_NATIVE)
474
+ # check machine level to determine target
475
+ file(READ "/proc/cpuinfo" CPUINFO_CONTENTS)
476
+ string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS})
477
+
478
+ # TODO: Separation to determine activation of VX/VXE/VXE2
479
+ if (${S390X_M} MATCHES "8561|8562")
480
+ message(STATUS "z15 target")
481
+ list(APPEND ARCH_FLAGS -march=z15)
482
+ elseif (${S390X_M} MATCHES "3931")
483
+ message(STATUS "z16 target")
484
+ list(APPEND ARCH_FLAGS -march=z16)
485
+ elseif (${S390X_M} MATCHES "9175|9176")
486
+ # NOTE: Only available from GCC 15.1.0 onwards. Any z17 machine with compile issues must first verify their GCC version.
487
+ # binutils must also be updated to the latest for the -march=z17 flag to work. Otherwise, use -march=arch15.
488
+ message(STATUS "z17 target")
489
+ list(APPEND ARCH_FLAGS -march=arch15)
490
+ else()
491
+ message(STATUS "Unknown target")
492
+ message(WARNING "Unknown target. If you are compiling for z14 and earlier, you might have to add -DGGML_VXE=OFF.")
493
+ list(APPEND ARCH_FLAGS -march=native -mtune=native)
494
+ endif()
495
+ # for cross-compilation
496
+ elseif(GGML_CPU_ALL_VARIANTS)
497
+ # range through IBM z15 to z17
498
+ # NOTE: update when a new hardware level is released
499
+ foreach (ZHW RANGE 15 17)
500
+ if(DEFINED GGML_INTERNAL_Z${ZHW})
501
+ message(STATUS "z${ZHW} cross-compile target")
502
+ list(APPEND ARCH_FLAGS -march=z${ZHW})
503
+ endif()
504
+ endforeach()
489
505
  endif()
490
506
 
491
- if (GGML_VXE)
507
+ if (GGML_VXE OR GGML_INTERNAL_VXE)
492
508
  message(STATUS "VX/VXE/VXE2 enabled")
493
509
  list(APPEND ARCH_FLAGS -mvx -mzvector)
494
510
  list(APPEND ARCH_DEFINITIONS GGML_VXE)
@@ -7519,8 +7519,8 @@ static void ggml_compute_forward_upscale_f32(
7519
7519
  float pixel_offset = 0.5f;
7520
7520
  if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) {
7521
7521
  pixel_offset = 0.0f;
7522
- sf0 = (float)(ne0 - 1) / (src0->ne[0] - 1);
7523
- sf1 = (float)(ne1 - 1) / (src0->ne[1] - 1);
7522
+ sf0 = ne0 > 1 && ne00 > 1 ? (float)(ne0 - 1) / (ne00 - 1) : sf0;
7523
+ sf1 = ne1 > 1 && ne01 > 1 ? (float)(ne1 - 1) / (ne01 - 1) : sf1;
7524
7524
  }
7525
7525
 
7526
7526
  for (int64_t i3 = 0; i3 < ne3; i3++) {