@fugood/llama.node 1.3.4 → 1.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/CMakeLists.txt +21 -1
  2. package/lib/binding.js +1 -1
  3. package/lib/binding.ts +47 -15
  4. package/lib/index.js +26 -2
  5. package/lib/index.ts +42 -10
  6. package/package.json +15 -14
  7. package/scripts/llama.cpp.patch +31 -10
  8. package/src/LlamaContext.cpp +46 -0
  9. package/src/LlamaContext.h +2 -0
  10. package/src/llama.cpp/common/CMakeLists.txt +2 -0
  11. package/src/llama.cpp/common/chat-parser-xml-toolcall.cpp +861 -0
  12. package/src/llama.cpp/common/chat-parser-xml-toolcall.h +45 -0
  13. package/src/llama.cpp/common/chat-parser.h +10 -0
  14. package/src/llama.cpp/common/chat.cpp +461 -87
  15. package/src/llama.cpp/common/chat.h +6 -0
  16. package/src/llama.cpp/common/common.cpp +8 -1
  17. package/src/llama.cpp/common/common.h +12 -5
  18. package/src/llama.cpp/common/json-partial.cpp +19 -2
  19. package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -0
  20. package/src/llama.cpp/common/json-schema-to-grammar.h +2 -0
  21. package/src/llama.cpp/common/sampling.cpp +60 -6
  22. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +31 -38
  23. package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6 -6
  24. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +15 -5
  25. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +2 -3
  26. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +16 -14
  27. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +49 -48
  28. package/src/llama.cpp/src/llama-grammar.cpp +17 -9
  29. package/src/llama.cpp/src/llama-impl.cpp +3 -3
  30. package/src/llama.cpp/src/llama-sampling.cpp +3 -6
  31. package/src/llama.cpp/src/llama-vocab.cpp +1 -0
@@ -0,0 +1,45 @@
1
+ #pragma once
2
+
3
+ #include "chat.h"
4
+
5
+ #include <nlohmann/json.hpp>
6
+
7
+ #include <optional>
8
+ #include <string>
9
+ #include <vector>
10
+
11
+
12
+ // Sample config:
13
+ // MiniMax-M2 (left): <minimax:tool_call>\n<invoke name="tool-name">\n<parameter name="key">value</parameter>\n...</invoke>\n...</minimax:tool_call>
14
+ // GLM 4.5 (right): <tool_call>function_name\n<arg_key>key</arg_key>\n<arg_value>value</arg_value>\n</tool_call>
15
+ struct xml_tool_call_format {
16
+ std::string scope_start; // <minimax:tool_call>\n // \n // can be empty
17
+ std::string tool_start; // <invoke name=\" // <tool_call>
18
+ std::string tool_sep; // \">\n // \n // can be empty only for parse_xml_tool_calls
19
+ std::string key_start; // <parameter name=\" // <arg_key>
20
+ std::string key_val_sep; // \"> // </arg_key>\n<arg_value>
21
+ std::string val_end; // </parameter>\n // </arg_value>\n
22
+ std::string tool_end; // </invoke>\n // </tool_call>\n
23
+ std::string scope_end; // </minimax:tool_call> // // can be empty
24
+ // Set this if there can be dynamic spaces inside key_val_sep.
25
+ // e.g. key_val_sep=</arg_key> key_val_sep2=<arg_value> for GLM4.5
26
+ std::optional<std::string> key_val_sep2 = std::nullopt;
27
+ // Set true if argval should only be raw string. e.g. Hello "world" hi
28
+ // Set false if argval should only be json string. e.g. "Hello \"world\" hi"
29
+ // Defaults to std::nullopt, both will be allowed.
30
+ std::optional<bool> raw_argval = std::nullopt;
31
+ std::optional<std::string> last_val_end = std::nullopt;
32
+ std::optional<std::string> last_tool_end = std::nullopt;
33
+ bool trim_raw_argval = false;
34
+ bool allow_toolcall_in_think = false; // TODO: UNTESTED!!!
35
+ };
36
+
37
+ // make a GBNF that accept any strings except those containing any of the forbidden strings.
38
+ std::string make_gbnf_excluding(std::vector<std::string> forbids);
39
+
40
+ /**
41
+ * Build grammar for xml-style tool call
42
+ * form.scope_start and form.scope_end can be empty.
43
+ * Requires data.format for model-specific hacks.
44
+ */
45
+ void build_grammar_xml_tool_call(common_chat_params & data, const nlohmann::ordered_json & tools, const struct xml_tool_call_format & form);
@@ -1,6 +1,7 @@
1
1
  #pragma once
2
2
 
3
3
  #include "chat.h"
4
+ #include "chat-parser-xml-toolcall.h"
4
5
  #include "json-partial.h"
5
6
  #include "regex-partial.h"
6
7
 
@@ -119,5 +120,14 @@ class common_chat_msg_parser {
119
120
  const std::vector<std::vector<std::string>> & content_paths = {}
120
121
  );
121
122
 
123
+ /**
124
+ * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
125
+ * form.scope_start, form.tool_sep and form.scope_end can be empty.
126
+ */
127
+ bool try_consume_xml_tool_calls(const struct xml_tool_call_format & form);
128
+
129
+ // Parse content uses reasoning and XML-Style tool call
130
+ void consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think = "<think>", const std::string & end_think = "</think>");
131
+
122
132
  void clear_tools();
123
133
  };