@fugood/llama.node 1.4.2 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CMakeLists.txt +1 -1
  2. package/lib/binding.js +3 -0
  3. package/lib/binding.ts +2 -0
  4. package/package.json +15 -15
  5. package/scripts/llama.cpp.patch +25 -11
  6. package/src/llama.cpp/CMakeLists.txt +21 -6
  7. package/src/llama.cpp/common/CMakeLists.txt +6 -0
  8. package/src/llama.cpp/common/arg.cpp +65 -16
  9. package/src/llama.cpp/common/chat-parser.cpp +40 -0
  10. package/src/llama.cpp/common/chat-peg-parser.cpp +110 -0
  11. package/src/llama.cpp/common/chat-peg-parser.h +105 -0
  12. package/src/llama.cpp/common/chat.cpp +40 -29
  13. package/src/llama.cpp/common/chat.h +10 -1
  14. package/src/llama.cpp/common/common.cpp +24 -5
  15. package/src/llama.cpp/common/common.h +16 -5
  16. package/src/llama.cpp/common/download.cpp +18 -8
  17. package/src/llama.cpp/common/download.h +3 -1
  18. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  19. package/src/llama.cpp/common/log.cpp +15 -1
  20. package/src/llama.cpp/common/log.h +19 -12
  21. package/src/llama.cpp/common/peg-parser.cpp +1712 -0
  22. package/src/llama.cpp/common/peg-parser.h +459 -0
  23. package/src/llama.cpp/common/unicode.cpp +64 -0
  24. package/src/llama.cpp/common/unicode.h +22 -0
  25. package/src/llama.cpp/ggml/CMakeLists.txt +48 -48
  26. package/src/llama.cpp/ggml/include/ggml.h +7 -2
  27. package/src/llama.cpp/ggml/src/CMakeLists.txt +0 -4
  28. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
  29. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +10 -13
  30. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +60 -1
  31. package/src/llama.cpp/src/CMakeLists.txt +1 -0
  32. package/src/llama.cpp/src/llama-arch.cpp +30 -1
  33. package/src/llama.cpp/src/llama-arch.h +3 -0
  34. package/src/llama.cpp/src/llama-graph.cpp +3 -6
  35. package/src/llama.cpp/src/llama-hparams.h +2 -2
  36. package/src/llama.cpp/src/llama-impl.h +1 -1
  37. package/src/llama.cpp/src/llama-mmap.cpp +1 -1
  38. package/src/llama.cpp/src/llama-model.cpp +50 -6
  39. package/src/llama.cpp/src/llama-vocab.cpp +1 -2
  40. package/src/llama.cpp/src/models/mistral3.cpp +160 -0
  41. package/src/llama.cpp/src/models/models.h +4 -0
package/CMakeLists.txt CHANGED
@@ -99,7 +99,7 @@ endif()
99
99
  set(CMAKE_POSITION_INDEPENDENT_CODE ON)
100
100
 
101
101
  if (MINGW)
102
- add_definitions(-D_WIN32_WINNT=0x0601)
102
+ add_definitions(-D_WIN32_WINNT=0x0A00)
103
103
  endif()
104
104
 
105
105
  # VULKAN_SDK
package/lib/binding.js CHANGED
@@ -64,6 +64,9 @@ const loadModule = (variant) => __awaiter(void 0, void 0, void 0, function* () {
64
64
  /* no-op */
65
65
  }
66
66
  }
67
+ const nDev = process.env.GGML_HEXAGON_NDEV;
68
+ if (!nDev)
69
+ process.env.GGML_HEXAGON_NDEV = '16';
67
70
  }
68
71
  let module = yield loadPlatformPackage(packageName);
69
72
  if (module) {
package/lib/binding.ts CHANGED
@@ -616,6 +616,8 @@ export const loadModule = async (variant?: LibVariant): Promise<Module> => {
616
616
  /* no-op */
617
617
  }
618
618
  }
619
+ const nDev = process.env.GGML_HEXAGON_NDEV
620
+ if (!nDev) process.env.GGML_HEXAGON_NDEV = '16'
619
621
  }
620
622
 
621
623
  let module = await loadPlatformPackage(packageName)
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.4.2",
4
+ "version": "1.4.3",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -72,20 +72,20 @@
72
72
  "CMakeLists.txt"
73
73
  ],
74
74
  "optionalDependencies": {
75
- "@fugood/node-llama-darwin-arm64": "1.4.2",
76
- "@fugood/node-llama-darwin-x64": "1.4.2",
77
- "@fugood/node-llama-linux-arm64": "1.4.2",
78
- "@fugood/node-llama-linux-arm64-cuda": "1.4.2",
79
- "@fugood/node-llama-linux-arm64-snapdragon": "1.4.2",
80
- "@fugood/node-llama-linux-arm64-vulkan": "1.4.2",
81
- "@fugood/node-llama-linux-x64": "1.4.2",
82
- "@fugood/node-llama-linux-x64-cuda": "1.4.2",
83
- "@fugood/node-llama-linux-x64-vulkan": "1.4.2",
84
- "@fugood/node-llama-win32-arm64": "1.4.2",
85
- "@fugood/node-llama-win32-arm64-vulkan": "1.4.2",
86
- "@fugood/node-llama-win32-x64": "1.4.2",
87
- "@fugood/node-llama-win32-x64-cuda": "1.4.2",
88
- "@fugood/node-llama-win32-x64-vulkan": "1.4.2"
75
+ "@fugood/node-llama-darwin-arm64": "1.4.3",
76
+ "@fugood/node-llama-darwin-x64": "1.4.3",
77
+ "@fugood/node-llama-linux-arm64": "1.4.3",
78
+ "@fugood/node-llama-linux-arm64-cuda": "1.4.3",
79
+ "@fugood/node-llama-linux-arm64-snapdragon": "1.4.3",
80
+ "@fugood/node-llama-linux-arm64-vulkan": "1.4.3",
81
+ "@fugood/node-llama-linux-x64": "1.4.3",
82
+ "@fugood/node-llama-linux-x64-cuda": "1.4.3",
83
+ "@fugood/node-llama-linux-x64-vulkan": "1.4.3",
84
+ "@fugood/node-llama-win32-arm64": "1.4.3",
85
+ "@fugood/node-llama-win32-arm64-vulkan": "1.4.3",
86
+ "@fugood/node-llama-win32-x64": "1.4.3",
87
+ "@fugood/node-llama-win32-x64-cuda": "1.4.3",
88
+ "@fugood/node-llama-win32-x64-vulkan": "1.4.3"
89
89
  },
90
90
  "devDependencies": {
91
91
  "@babel/preset-env": "^7.24.4",
@@ -1,8 +1,8 @@
1
1
  diff --git a/src/llama.cpp/common/CMakeLists.txt b/src/llama.cpp/common/CMakeLists.txt
2
- index bb168e835..cfc0e2c2e 100644
2
+ index 377b26846..1873b5206 100644
3
3
  --- a/src/llama.cpp/common/CMakeLists.txt
4
4
  +++ b/src/llama.cpp/common/CMakeLists.txt
5
- @@ -143,9 +143,16 @@ if (LLAMA_LLGUIDANCE)
5
+ @@ -149,9 +149,16 @@ if (LLAMA_LLGUIDANCE)
6
6
  set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
7
7
  endif ()
8
8
 
@@ -20,8 +20,22 @@ index bb168e835..cfc0e2c2e 100644
20
20
 
21
21
 
22
22
  #
23
+ diff --git a/src/llama.cpp/common/chat-peg-parser.cpp b/src/llama.cpp/common/chat-peg-parser.cpp
24
+ index 74a7b6a46..7b7a1bd50 100644
25
+ --- a/src/llama.cpp/common/chat-peg-parser.cpp
26
+ +++ b/src/llama.cpp/common/chat-peg-parser.cpp
27
+ @@ -1,9 +1,5 @@
28
+ #include "chat-peg-parser.h"
29
+
30
+ -#include <nlohmann/json.hpp>
31
+ -
32
+ -using json = nlohmann::json;
33
+ -
34
+ static std::string_view trim_trailing_space(std::string_view sv) {
35
+ while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.back()))) {
36
+ sv.remove_suffix(1);
23
37
  diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
24
- index b4a0f985e..2383d2ea9 100644
38
+ index 41a5bb42d..da5cf4b94 100644
25
39
  --- a/src/llama.cpp/common/chat.cpp
26
40
  +++ b/src/llama.cpp/common/chat.cpp
27
41
  @@ -6,9 +6,6 @@
@@ -34,7 +48,7 @@ index b4a0f985e..2383d2ea9 100644
34
48
  #include <algorithm>
35
49
  #include <cstdio>
36
50
  #include <cctype>
37
- @@ -126,16 +123,6 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
51
+ @@ -134,16 +131,6 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
38
52
  return diffs;
39
53
  }
40
54
 
@@ -51,7 +65,7 @@ index b4a0f985e..2383d2ea9 100644
51
65
  struct templates_params {
52
66
  json messages;
53
67
  json tools;
54
- @@ -709,7 +696,7 @@ static std::string apply(
68
+ @@ -720,7 +707,7 @@ static std::string apply(
55
69
  tmpl_inputs.extra_context.merge_patch(*additional_context);
56
70
  }
57
71
  // TODO: add flag to control date/time, if only for testing purposes.
@@ -61,10 +75,10 @@ index b4a0f985e..2383d2ea9 100644
61
75
  minja::chat_template_options tmpl_opts;
62
76
  // To avoid double BOS / EOS tokens, we're manually removing begining / trailing tokens
63
77
  diff --git a/src/llama.cpp/common/chat.h b/src/llama.cpp/common/chat.h
64
- index 754c411e2..71241a6cc 100644
78
+ index 6085510a4..263076ce2 100644
65
79
  --- a/src/llama.cpp/common/chat.h
66
80
  +++ b/src/llama.cpp/common/chat.h
67
- @@ -9,7 +9,18 @@
81
+ @@ -10,7 +10,18 @@
68
82
  #include <vector>
69
83
  #include <map>
70
84
 
@@ -85,10 +99,10 @@ index 754c411e2..71241a6cc 100644
85
99
  struct common_chat_tool_call {
86
100
  std::string name;
87
101
  diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
88
- index 0d7fd9a93..6bf3cc7ab 100644
102
+ index f07af1d86..1b10c7b13 100644
89
103
  --- a/src/llama.cpp/common/common.cpp
90
104
  +++ b/src/llama.cpp/common/common.cpp
91
- @@ -1217,6 +1217,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
105
+ @@ -1236,6 +1236,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
92
106
  mparams.n_gpu_layers = params.n_gpu_layers;
93
107
  }
94
108
 
@@ -97,10 +111,10 @@ index 0d7fd9a93..6bf3cc7ab 100644
97
111
  mparams.split_mode = params.split_mode;
98
112
  mparams.tensor_split = params.tensor_split;
99
113
  diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
100
- index 2f23d0baa..e4e6c795e 100644
114
+ index 179113a4d..78aa24bc3 100644
101
115
  --- a/src/llama.cpp/common/common.h
102
116
  +++ b/src/llama.cpp/common/common.h
103
- @@ -299,6 +299,7 @@ struct lr_opt {
117
+ @@ -302,6 +302,7 @@ struct lr_opt {
104
118
  struct ggml_opt_optimizer_params common_opt_lr_pars(void * userdata);
105
119
 
106
120
  struct common_params {
@@ -33,10 +33,24 @@ endif()
33
33
 
34
34
  option(LLAMA_USE_SYSTEM_GGML "Use system libggml" OFF)
35
35
 
36
+ option(LLAMA_WASM_MEM64 "llama: use 64-bit memory in WASM builds" ON)
37
+
36
38
  if (EMSCRIPTEN)
37
39
  set(BUILD_SHARED_LIBS_DEFAULT OFF)
38
40
 
39
- option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON)
41
+ # Use 64-bit memory to support backend_get_memory queries
42
+ # TODO: analyze performance impact, see https://spidermonkey.dev/blog/2025/01/15/is-memory64-actually-worth-using
43
+ if (LLAMA_WASM_MEM64)
44
+ add_compile_options("-sMEMORY64=1")
45
+ add_link_options("-sMEMORY64=1")
46
+ endif()
47
+ add_link_options("-sALLOW_MEMORY_GROWTH=1")
48
+
49
+ option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" OFF)
50
+ option(LLAMA_BUILD_HTML "llama: build HTML file" ON)
51
+ if (LLAMA_BUILD_HTML)
52
+ set(CMAKE_EXECUTABLE_SUFFIX ".html")
53
+ endif()
40
54
  else()
41
55
  if (MINGW)
42
56
  set(BUILD_SHARED_LIBS_DEFAULT OFF)
@@ -58,6 +72,12 @@ if (MSVC)
58
72
  add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/bigobj>")
59
73
  endif()
60
74
 
75
+ if (LLAMA_STANDALONE)
76
+ # enable parallel builds for msbuild
77
+ list(APPEND CMAKE_VS_GLOBALS UseMultiToolTask=true)
78
+ list(APPEND CMAKE_VS_GLOBALS EnforceProcessCountAcrossBuilds=true)
79
+ endif()
80
+
61
81
  if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
62
82
  set(LLAMA_TOOLS_INSTALL_DEFAULT OFF)
63
83
  else()
@@ -179,11 +199,6 @@ if (NOT TARGET ggml AND NOT LLAMA_USE_SYSTEM_GGML)
179
199
  # ... otherwise assume ggml is added by a parent CMakeLists.txt
180
200
  endif()
181
201
 
182
- if (MINGW)
183
- # Target Windows 8 for PrefetchVirtualMemory
184
- add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
185
- endif()
186
-
187
202
  #
188
203
  # build the library
189
204
  #
@@ -52,6 +52,8 @@ add_library(${TARGET} STATIC
52
52
  chat-parser.h
53
53
  chat-parser-xml-toolcall.h
54
54
  chat-parser-xml-toolcall.cpp
55
+ chat-peg-parser.cpp
56
+ chat-peg-parser.h
55
57
  chat.cpp
56
58
  chat.h
57
59
  common.cpp
@@ -69,12 +71,16 @@ add_library(${TARGET} STATIC
69
71
  log.h
70
72
  ngram-cache.cpp
71
73
  ngram-cache.h
74
+ peg-parser.cpp
75
+ peg-parser.h
72
76
  regex-partial.cpp
73
77
  regex-partial.h
74
78
  sampling.cpp
75
79
  sampling.h
76
80
  speculative.cpp
77
81
  speculative.h
82
+ unicode.cpp
83
+ unicode.h
78
84
  )
79
85
 
80
86
  if (BUILD_SHARED_LIBS)
@@ -30,6 +30,7 @@
30
30
  #include <thread> // for hardware_concurrency
31
31
  #include <vector>
32
32
 
33
+ #ifndef __EMSCRIPTEN__
33
34
  #ifdef __linux__
34
35
  #include <linux/limits.h>
35
36
  #elif defined(_WIN32)
@@ -41,6 +42,8 @@
41
42
  #else
42
43
  #include <sys/syslimits.h>
43
44
  #endif
45
+ #endif
46
+
44
47
  #define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
45
48
 
46
49
  using json = nlohmann::ordered_json;
@@ -212,13 +215,13 @@ struct handle_model_result {
212
215
  static handle_model_result common_params_handle_model(
213
216
  struct common_params_model & model,
214
217
  const std::string & bearer_token,
215
- const std::string & model_path_default,
216
218
  bool offline) {
217
219
  handle_model_result result;
218
220
  // handle pre-fill default model path and url based on hf_repo and hf_file
219
221
  {
220
222
  if (!model.docker_repo.empty()) { // Handle Docker URLs by resolving them to local paths
221
223
  model.path = common_docker_resolve_model(model.docker_repo);
224
+ model.name = model.docker_repo; // set name for consistency
222
225
  } else if (!model.hf_repo.empty()) {
223
226
  // short-hand to avoid specifying --hf-file -> default it to --model
224
227
  if (model.hf_file.empty()) {
@@ -227,7 +230,8 @@ static handle_model_result common_params_handle_model(
227
230
  if (auto_detected.repo.empty() || auto_detected.ggufFile.empty()) {
228
231
  exit(1); // built without CURL, error message already printed
229
232
  }
230
- model.hf_repo = auto_detected.repo;
233
+ model.name = model.hf_repo; // repo name with tag
234
+ model.hf_repo = auto_detected.repo; // repo name without tag
231
235
  model.hf_file = auto_detected.ggufFile;
232
236
  if (!auto_detected.mmprojFile.empty()) {
233
237
  result.found_mmproj = true;
@@ -257,8 +261,6 @@ static handle_model_result common_params_handle_model(
257
261
  model.path = fs_get_cache_file(string_split<std::string>(f, '/').back());
258
262
  }
259
263
 
260
- } else if (model.path.empty()) {
261
- model.path = model_path_default;
262
264
  }
263
265
  }
264
266
 
@@ -405,7 +407,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
405
407
 
406
408
  // handle model and download
407
409
  {
408
- auto res = common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH, params.offline);
410
+ auto res = common_params_handle_model(params.model, params.hf_token, params.offline);
409
411
  if (params.no_mmproj) {
410
412
  params.mmproj = {};
411
413
  } else if (res.found_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty()) {
@@ -415,12 +417,18 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
415
417
  // only download mmproj if the current example is using it
416
418
  for (auto & ex : mmproj_examples) {
417
419
  if (ctx_arg.ex == ex) {
418
- common_params_handle_model(params.mmproj, params.hf_token, "", params.offline);
420
+ common_params_handle_model(params.mmproj, params.hf_token, params.offline);
419
421
  break;
420
422
  }
421
423
  }
422
- common_params_handle_model(params.speculative.model, params.hf_token, "", params.offline);
423
- common_params_handle_model(params.vocoder.model, params.hf_token, "", params.offline);
424
+ common_params_handle_model(params.speculative.model, params.hf_token, params.offline);
425
+ common_params_handle_model(params.vocoder.model, params.hf_token, params.offline);
426
+ }
427
+
428
+ // model is required (except for server)
429
+ // TODO @ngxson : maybe show a list of available models in CLI in this case
430
+ if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER) {
431
+ throw std::invalid_argument("error: --model is required\n");
424
432
  }
425
433
 
426
434
  if (params.escape) {
@@ -1221,7 +1229,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1221
1229
  [](common_params & params) {
1222
1230
  params.warmup = false;
1223
1231
  }
1224
- ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL, LLAMA_EXAMPLE_PERPLEXITY}));
1232
+ ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MTMD, LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL, LLAMA_EXAMPLE_PERPLEXITY}));
1225
1233
  add_opt(common_arg(
1226
1234
  {"--spm-infill"},
1227
1235
  string_format(
@@ -2090,11 +2098,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
2090
2098
  add_opt(common_arg(
2091
2099
  {"-m", "--model"}, "FNAME",
2092
2100
  ex == LLAMA_EXAMPLE_EXPORT_LORA
2093
- ? std::string("model path from which to load base model")
2094
- : string_format(
2095
- "model path (default: `models/$filename` with filename from `--hf-file` "
2096
- "or `--model-url` if set, otherwise %s)", DEFAULT_MODEL_PATH
2097
- ),
2101
+ ? "model path from which to load base model"
2102
+ : "model path to load",
2098
2103
  [](common_params & params, const std::string & value) {
2099
2104
  params.model.path = value;
2100
2105
  }
@@ -2486,12 +2491,50 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
2486
2491
  "path to save slot kv cache (default: disabled)",
2487
2492
  [](common_params & params, const std::string & value) {
2488
2493
  params.slot_save_path = value;
2494
+ if (!fs_is_directory(params.slot_save_path)) {
2495
+ throw std::invalid_argument("not a directory: " + value);
2496
+ }
2489
2497
  // if doesn't end with DIRECTORY_SEPARATOR, add it
2490
2498
  if (!params.slot_save_path.empty() && params.slot_save_path[params.slot_save_path.size() - 1] != DIRECTORY_SEPARATOR) {
2491
2499
  params.slot_save_path += DIRECTORY_SEPARATOR;
2492
2500
  }
2493
2501
  }
2494
2502
  ).set_examples({LLAMA_EXAMPLE_SERVER}));
2503
+ add_opt(common_arg(
2504
+ {"--media-path"}, "PATH",
2505
+ "directory for loading local media files; files can be accessed via file:// URLs using relative paths (default: disabled)",
2506
+ [](common_params & params, const std::string & value) {
2507
+ params.media_path = value;
2508
+ if (!fs_is_directory(params.media_path)) {
2509
+ throw std::invalid_argument("not a directory: " + value);
2510
+ }
2511
+ // if doesn't end with DIRECTORY_SEPARATOR, add it
2512
+ if (!params.media_path.empty() && params.media_path[params.media_path.size() - 1] != DIRECTORY_SEPARATOR) {
2513
+ params.media_path += DIRECTORY_SEPARATOR;
2514
+ }
2515
+ }
2516
+ ).set_examples({LLAMA_EXAMPLE_SERVER}));
2517
+ add_opt(common_arg(
2518
+ {"--models-dir"}, "PATH",
2519
+ "directory containing models for the router server (default: disabled)",
2520
+ [](common_params & params, const std::string & value) {
2521
+ params.models_dir = value;
2522
+ }
2523
+ ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_DIR"));
2524
+ add_opt(common_arg(
2525
+ {"--models-max"}, "N",
2526
+ string_format("for router server, maximum number of models to load simultaneously (default: %d, 0 = unlimited)", params.models_max),
2527
+ [](common_params & params, int value) {
2528
+ params.models_max = value;
2529
+ }
2530
+ ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_MAX"));
2531
+ add_opt(common_arg(
2532
+ {"--no-models-autoload"},
2533
+ "disables automatic loading of models (default: enabled)",
2534
+ [](common_params & params) {
2535
+ params.models_autoload = false;
2536
+ }
2537
+ ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_MODELS_AUTOLOAD"));
2495
2538
  add_opt(common_arg(
2496
2539
  {"--jinja"},
2497
2540
  string_format("use jinja template for chat (default: %s)\n", params.use_jinja ? "enabled" : "disabled"),
@@ -2639,7 +2682,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
2639
2682
  [](common_params &, const std::string & value) {
2640
2683
  common_log_set_file(common_log_main(), value.c_str());
2641
2684
  }
2642
- ));
2685
+ ).set_env("LLAMA_LOG_FILE"));
2643
2686
  add_opt(common_arg(
2644
2687
  {"--log-colors"}, "[on|off|auto]",
2645
2688
  "Set colored logging ('on', 'off', or 'auto', default: 'auto')\n"
@@ -2674,7 +2717,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
2674
2717
  ).set_env("LLAMA_OFFLINE"));
2675
2718
  add_opt(common_arg(
2676
2719
  {"-lv", "--verbosity", "--log-verbosity"}, "N",
2677
- "Set the verbosity threshold. Messages with a higher verbosity will be ignored.",
2720
+ string_format("Set the verbosity threshold. Messages with a higher verbosity will be ignored. Values:\n"
2721
+ " - 0: generic output\n"
2722
+ " - 1: error\n"
2723
+ " - 2: warning\n"
2724
+ " - 3: info\n"
2725
+ " - 4: debug\n"
2726
+ "(default: %d)\n", params.verbosity),
2678
2727
  [](common_params & params, int value) {
2679
2728
  params.verbosity = value;
2680
2729
  common_log_set_verbosity_thold(value);
@@ -1,6 +1,8 @@
1
1
  #include "chat-parser.h"
2
+ #include "chat-peg-parser.h"
2
3
  #include "common.h"
3
4
  #include "log.h"
5
+ #include "peg-parser.h"
4
6
  #include "regex-partial.h"
5
7
 
6
8
  #include <algorithm>
@@ -1483,6 +1485,11 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
1483
1485
  }
1484
1486
 
1485
1487
  common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
1488
+ if (syntax.format == COMMON_CHAT_FORMAT_PEG_SIMPLE ||
1489
+ syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE ||
1490
+ syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
1491
+ return common_chat_peg_parse(syntax.parser, input, is_partial, syntax);
1492
+ }
1486
1493
  common_chat_msg_parser builder(input, is_partial, syntax);
1487
1494
  try {
1488
1495
  common_chat_parse(builder);
@@ -1500,3 +1507,36 @@ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, co
1500
1507
  }
1501
1508
  return msg;
1502
1509
  }
1510
+
1511
+ common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
1512
+ if (parser.empty()) {
1513
+ throw std::runtime_error("Failed to parse due to missing parser definition.");
1514
+ }
1515
+
1516
+ LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(syntax.format), input.c_str());
1517
+
1518
+ common_peg_parse_context ctx(input, is_partial);
1519
+ auto result = parser.parse(ctx);
1520
+ if (result.fail()) {
1521
+ throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end));
1522
+ }
1523
+
1524
+ common_chat_msg msg;
1525
+ msg.role = "assistant";
1526
+
1527
+ if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) {
1528
+ auto mapper = common_chat_peg_native_mapper(msg);
1529
+ mapper.from_ast(ctx.ast, result);
1530
+ } else if (syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
1531
+ auto mapper = common_chat_peg_constructed_mapper(msg);
1532
+ mapper.from_ast(ctx.ast, result);
1533
+ } else {
1534
+ // Generic mapper
1535
+ auto mapper = common_chat_peg_mapper(msg);
1536
+ mapper.from_ast(ctx.ast, result);
1537
+ }
1538
+ if (!is_partial) {
1539
+ LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());
1540
+ }
1541
+ return msg;
1542
+ }
@@ -0,0 +1,110 @@
1
+ #include "chat-peg-parser.h"
2
+
3
+ static std::string_view trim_trailing_space(std::string_view sv) {
4
+ while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.back()))) {
5
+ sv.remove_suffix(1);
6
+ }
7
+ return sv;
8
+ }
9
+
10
+ void common_chat_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) {
11
+ arena.visit(result, [this](const common_peg_ast_node & node) {
12
+ map(node);
13
+ });
14
+ }
15
+
16
+ void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
17
+ bool is_reasoning = node.tag == common_chat_peg_builder::REASONING;
18
+ bool is_content = node.tag == common_chat_peg_builder::CONTENT;
19
+
20
+ if (is_reasoning) {
21
+ result.reasoning_content = std::string(trim_trailing_space(node.text));
22
+ }
23
+
24
+ if (is_content) {
25
+ result.content = std::string(trim_trailing_space(node.text));
26
+ }
27
+ }
28
+
29
+ void common_chat_peg_native_mapper::map(const common_peg_ast_node & node) {
30
+ common_chat_peg_mapper::map(node);
31
+
32
+ bool is_tool_open = node.tag == common_chat_peg_native_builder::TOOL_OPEN;
33
+ bool is_tool_name = node.tag == common_chat_peg_native_builder::TOOL_NAME;
34
+ bool is_tool_id = node.tag == common_chat_peg_native_builder::TOOL_ID;
35
+ bool is_tool_args = node.tag == common_chat_peg_native_builder::TOOL_ARGS;
36
+
37
+ if (is_tool_open) {
38
+ result.tool_calls.emplace_back();
39
+ current_tool = &result.tool_calls.back();
40
+ }
41
+
42
+ if (is_tool_id && current_tool) {
43
+ current_tool->id = std::string(trim_trailing_space(node.text));
44
+ }
45
+
46
+ if (is_tool_name && current_tool) {
47
+ current_tool->name = std::string(trim_trailing_space(node.text));
48
+ }
49
+
50
+ if (is_tool_args && current_tool) {
51
+ current_tool->arguments = std::string(trim_trailing_space(node.text));
52
+ }
53
+ }
54
+
55
+ void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) {
56
+ common_chat_peg_mapper::map(node);
57
+
58
+ bool is_tool_open = node.tag == common_chat_peg_constructed_builder::TOOL_OPEN;
59
+ bool is_tool_name = node.tag == common_chat_peg_constructed_builder::TOOL_NAME;
60
+ bool is_tool_close = node.tag == common_chat_peg_constructed_builder::TOOL_CLOSE;
61
+ bool is_arg_open = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_OPEN;
62
+ bool is_arg_close = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_CLOSE;
63
+ bool is_arg_name = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_NAME;
64
+ bool is_arg_string = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_STRING_VALUE;
65
+ bool is_arg_json = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_JSON_VALUE;
66
+
67
+ if (is_tool_open) {
68
+ result.tool_calls.emplace_back();
69
+ current_tool = &result.tool_calls.back();
70
+ arg_count = 0;
71
+ }
72
+
73
+ if (is_tool_name) {
74
+ current_tool->name = std::string(node.text);
75
+ current_tool->arguments = "{";
76
+ }
77
+
78
+ if (is_arg_open) {
79
+ needs_closing_quote = false;
80
+ }
81
+
82
+ if (is_arg_name && current_tool) {
83
+ if (arg_count > 0) {
84
+ current_tool->arguments += ",";
85
+ }
86
+ current_tool->arguments += json(trim_trailing_space(node.text)).dump() + ":";
87
+ ++arg_count;
88
+ }
89
+
90
+ if (is_arg_string && current_tool) {
91
+ // Serialize to JSON, but exclude the end quote
92
+ std::string dumped = json(node.text).dump();
93
+ current_tool->arguments += dumped.substr(0, dumped.size() - 1);
94
+ needs_closing_quote = true;
95
+ }
96
+
97
+ if (is_arg_close && current_tool) {
98
+ if (needs_closing_quote) {
99
+ current_tool->arguments += "\"";
100
+ }
101
+ }
102
+
103
+ if (is_arg_json && current_tool) {
104
+ current_tool->arguments += std::string(trim_trailing_space(node.text));
105
+ }
106
+
107
+ if (is_tool_close && current_tool) {
108
+ current_tool->arguments += "}";
109
+ }
110
+ }